From e5bd51ab62461bedf333b44a3a5efdc204f1ea09 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 5 Jun 2026 21:23:20 -0400 Subject: [PATCH 01/36] ci: hourly upstream sync from postgres/postgres MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep master a pristine mirror of upstream plus our .github/ CI. These workflows rebase the .github-only commits onto postgres/postgres and push via SYNC_PAT (a PAT carrying the 'workflow' scope — required because the default GITHUB_TOKEN cannot update files under .github/workflows/): - sync-upstream.yml (hourly schedule + manual dispatch) - sync-upstream-manual.yml (on-demand, with a force-push toggle) --- .github/workflows/sync-upstream-manual.yml | 252 ++++++++++++++++++++ .github/workflows/sync-upstream.yml | 257 +++++++++++++++++++++ 2 files changed, 509 insertions(+) create mode 100644 .github/workflows/sync-upstream-manual.yml create mode 100644 .github/workflows/sync-upstream.yml diff --git a/.github/workflows/sync-upstream-manual.yml b/.github/workflows/sync-upstream-manual.yml new file mode 100644 index 0000000000000..bb9c0b0c203a6 --- /dev/null +++ b/.github/workflows/sync-upstream-manual.yml @@ -0,0 +1,252 @@ +name: Sync from Upstream (Manual) + +on: + workflow_dispatch: + inputs: + force_push: + description: 'Use --force-with-lease when pushing' + required: false + type: boolean + default: true + +jobs: + sync: + runs-on: ubuntu-latest + permissions: + contents: write + issues: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + # PAT with the 'workflow' scope. The default GITHUB_TOKEN cannot push + # changes under .github/workflows/ (upstream now ships pg-ci.yml), so + # the rebase+push would be rejected with a 'workflows permission' error. + token: ${{ secrets.SYNC_PAT }} + + - name: Configure Git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Add upstream remote + run: | + git remote add upstream https://github.com/postgres/postgres.git || true + git remote -v + + - name: Fetch upstream + run: | + echo "Fetching from upstream postgres/postgres..." + git fetch upstream master + echo "Current local master:" + git log origin/master --oneline -5 + echo "Upstream master:" + git log upstream/master --oneline -5 + + - name: Check for local commits + id: check_commits + run: | + git checkout master + LOCAL_COMMITS=$(git rev-list origin/master..upstream/master --count) + DIVERGED=$(git rev-list upstream/master..origin/master --count) + echo "commits_behind=$LOCAL_COMMITS" >> $GITHUB_OUTPUT + echo "commits_ahead=$DIVERGED" >> $GITHUB_OUTPUT + echo "Mirror is $DIVERGED commits ahead and $LOCAL_COMMITS commits behind upstream" + + if [ "$DIVERGED" -gt 0 ]; then + # Check commit messages for "dev setup" or "dev v" pattern + DEV_SETUP_COMMITS=$(git log --format=%s upstream/master...origin/master | grep -iE "^dev (setup|v[0-9])" | wc -l) + echo "dev_setup_commits=$DEV_SETUP_COMMITS" >> $GITHUB_OUTPUT + + # Check if diverged commits only touch .github/ directory + NON_GITHUB_CHANGES=$(git diff --name-only upstream/master...origin/master | grep -v "^\.github/" | wc -l) + echo "non_github_changes=$NON_GITHUB_CHANGES" >> $GITHUB_OUTPUT + + if [ "$NON_GITHUB_CHANGES" -eq 0 ]; then + echo "✓ All local commits are CI/CD configuration (.github/ only)" + elif [ "$DEV_SETUP_COMMITS" -gt 0 ]; then + echo "✓ Found $DEV_SETUP_COMMITS 'dev setup/version' commit(s)" + else + echo "⚠️ WARNING: Local commits modify files outside .github/ and are not 'dev setup/version' commits!" + git diff --name-only upstream/master...origin/master | grep -v "^\.github/" || true + fi + else + echo "non_github_changes=0" >> $GITHUB_OUTPUT + echo "dev_setup_commits=0" >> $GITHUB_OUTPUT + fi + + - name: Attempt merge + id: merge + run: | + COMMITS_AHEAD=${{ steps.check_commits.outputs.commits_ahead }} + COMMITS_BEHIND=${{ steps.check_commits.outputs.commits_behind }} + NON_GITHUB_CHANGES=${{ steps.check_commits.outputs.non_github_changes }} + DEV_SETUP_COMMITS=${{ steps.check_commits.outputs.dev_setup_commits }} + + # Check if there are problematic local commits + # Allow commits if: + # 1. Only .github/ changes (CI/CD config) + # 2. Has "dev setup/version" commits (personal development environment) + if [ "$COMMITS_AHEAD" -gt 0 ] && [ "$NON_GITHUB_CHANGES" -gt 0 ]; then + if [ "$DEV_SETUP_COMMITS" -eq 0 ]; then + echo "❌ Local master has commits outside .github/ that are not 'dev setup/version' commits!" + echo "merge_status=conflict" >> $GITHUB_OUTPUT + exit 1 + else + echo "✓ Non-.github/ changes are from 'dev setup/version' commits - allowed" + fi + fi + + # Already up to date + if [ "$COMMITS_BEHIND" -eq 0 ]; then + echo "✓ Already up to date with upstream" + echo "merge_status=uptodate" >> $GITHUB_OUTPUT + exit 0 + fi + + # Try fast-forward first (clean case) + if [ "$COMMITS_AHEAD" -eq 0 ]; then + echo "Fast-forwarding to upstream (no local commits)..." + git merge --ff-only upstream/master + echo "merge_status=success" >> $GITHUB_OUTPUT + exit 0 + fi + + # Local commits exist (.github/ and/or dev setup/version) - rebase onto upstream + if [ "$DEV_SETUP_COMMITS" -gt 0 ]; then + echo "Rebasing local CI/CD and dev setup/version commits onto upstream..." + else + echo "Rebasing local CI/CD commits (.github/ only) onto upstream..." + fi + + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + if git rebase upstream/master; then + echo "✓ Successfully rebased local commits onto upstream" + echo "merge_status=success" >> $GITHUB_OUTPUT + else + echo "❌ Rebase conflict occurred" + echo "merge_status=conflict" >> $GITHUB_OUTPUT + + # Abort the failed rebase to clean up state + git rebase --abort + exit 1 + fi + continue-on-error: true + + - name: Push to origin + if: steps.merge.outputs.merge_status == 'success' + run: | + if [ "${{ inputs.force_push }}" == "true" ]; then + git push origin master --force-with-lease + else + git push origin master + fi + echo "✓ Successfully synced master with upstream" + + - name: Create issue on failure + if: steps.merge.outputs.merge_status == 'conflict' + uses: actions/github-script@v7 + with: + script: | + const title = '🚨 Upstream Sync Failed - Manual Intervention Required'; + const body = `## Sync Failure Report + + The automated sync from \`postgres/postgres\` failed due to conflicting commits. + + **Details:** + - Local master has ${{ steps.check_commits.outputs.commits_ahead }} commit(s) not in upstream + - Upstream has ${{ steps.check_commits.outputs.commits_behind }} new commit(s) + - Non-.github/ changes: ${{ steps.check_commits.outputs.non_github_changes }} files + + **This indicates commits were made directly to master outside .github/**, which violates the pristine mirror policy. + + **Note:** Commits to .github/ (CI/CD configuration) are allowed and will be preserved during sync. + + ### Resolution Steps: + + 1. Identify the conflicting commits: + \`\`\`bash + git fetch origin + git fetch upstream https://github.com/postgres/postgres.git master + git log upstream/master..origin/master + \`\`\` + + 2. If these commits should be preserved: + - Create a feature branch: \`git checkout -b recovery/master-commits origin/master\` + - Reset master: \`git checkout master && git reset --hard upstream/master\` + - Push: \`git push origin master --force\` + - Cherry-pick or rebase the feature branch + + 3. If these commits should be discarded: + - Reset master: \`git checkout master && git reset --hard upstream/master\` + - Push: \`git push origin master --force\` + + 4. Close this issue once resolved + + **Workflow run:** ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + `; + + // Check if issue already exists + const issues = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'sync-failure' + }); + + if (issues.data.length === 0) { + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: title, + body: body, + labels: ['sync-failure', 'automation'] + }); + } + + - name: Close existing sync-failure issues + if: steps.merge.outputs.merge_status == 'success' + uses: actions/github-script@v7 + with: + script: | + const issues = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'sync-failure' + }); + + for (const issue of issues.data) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + body: '✓ Sync successful - closing this issue automatically.' + }); + + await github.rest.issues.update({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + state: 'closed' + }); + } + + - name: Summary + if: always() + run: | + echo "### Sync Summary" >> $GITHUB_STEP_SUMMARY + echo "- **Status:** ${{ steps.merge.outputs.merge_status }}" >> $GITHUB_STEP_SUMMARY + echo "- **Commits behind:** ${{ steps.check_commits.outputs.commits_behind }}" >> $GITHUB_STEP_SUMMARY + echo "- **Commits ahead:** ${{ steps.check_commits.outputs.commits_ahead }}" >> $GITHUB_STEP_SUMMARY + if [ "${{ steps.merge.outputs.merge_status }}" == "success" ]; then + echo "- **Result:** ✓ Successfully synced with upstream" >> $GITHUB_STEP_SUMMARY + elif [ "${{ steps.merge.outputs.merge_status }}" == "uptodate" ]; then + echo "- **Result:** ✓ Already up to date" >> $GITHUB_STEP_SUMMARY + else + echo "- **Result:** ⚠️ Sync failed - manual intervention required" >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/sync-upstream.yml b/.github/workflows/sync-upstream.yml new file mode 100644 index 0000000000000..39d5518702514 --- /dev/null +++ b/.github/workflows/sync-upstream.yml @@ -0,0 +1,257 @@ +name: Sync from Upstream (Automatic) + +on: + schedule: + # Run hourly every day + - cron: '0 * * * *' + workflow_dispatch: + +jobs: + sync: + runs-on: ubuntu-latest + permissions: + contents: write + issues: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + # PAT with the 'workflow' scope. The default GITHUB_TOKEN cannot push + # changes under .github/workflows/ (upstream now ships pg-ci.yml), so + # the rebase+push would be rejected with a 'workflows permission' error. + token: ${{ secrets.SYNC_PAT }} + + - name: Configure Git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Add upstream remote + run: | + git remote add upstream https://github.com/postgres/postgres.git || true + git remote -v + + - name: Fetch upstream + run: | + echo "Fetching from upstream postgres/postgres..." + git fetch upstream master + + - name: Check for local commits + id: check_commits + run: | + git checkout master + LOCAL_COMMITS=$(git rev-list origin/master..upstream/master --count) + DIVERGED=$(git rev-list upstream/master..origin/master --count) + echo "commits_behind=$LOCAL_COMMITS" >> $GITHUB_OUTPUT + echo "commits_ahead=$DIVERGED" >> $GITHUB_OUTPUT + + if [ "$LOCAL_COMMITS" -eq 0 ]; then + echo "✓ Already up to date with upstream" + else + echo "Mirror is $LOCAL_COMMITS commits behind upstream" + fi + + if [ "$DIVERGED" -gt 0 ]; then + echo "⚠️ Local master has $DIVERGED commits not in upstream" + + # Check commit messages for "dev setup" or "dev v" pattern + DEV_SETUP_COMMITS=$(git log --format=%s upstream/master..origin/master | grep -iE "^dev (setup|v[0-9])" | wc -l) + echo "dev_setup_commits=$DEV_SETUP_COMMITS" >> $GITHUB_OUTPUT + + # Check if diverged commits only touch .github/ directory + NON_GITHUB_CHANGES=$(git diff --name-only upstream/master...origin/master | grep -v "^\.github/" | wc -l) + echo "non_github_changes=$NON_GITHUB_CHANGES" >> $GITHUB_OUTPUT + + if [ "$NON_GITHUB_CHANGES" -eq 0 ]; then + echo "✓ All local commits are CI/CD configuration (.github/ only) - will merge" + elif [ "$DEV_SETUP_COMMITS" -gt 0 ]; then + echo "✓ Found $DEV_SETUP_COMMITS 'dev setup/version' commit(s)" + else + echo "⚠️ WARNING: Local commits modify files outside .github/ and are not 'dev setup/version' commits!" + git diff --name-only upstream/master...origin/master | grep -v "^\.github/" || true + echo "Non-dev commits:" + git log --format=" %h %s" upstream/master..origin/master | grep -ivE "^ [a-f0-9]* dev (setup|v[0-9])" || true + fi + else + echo "non_github_changes=0" >> $GITHUB_OUTPUT + echo "dev_setup_commits=0" >> $GITHUB_OUTPUT + fi + + - name: Attempt merge + id: merge + run: | + COMMITS_AHEAD=${{ steps.check_commits.outputs.commits_ahead }} + COMMITS_BEHIND=${{ steps.check_commits.outputs.commits_behind }} + NON_GITHUB_CHANGES=${{ steps.check_commits.outputs.non_github_changes }} + DEV_SETUP_COMMITS=${{ steps.check_commits.outputs.dev_setup_commits }} + + # Check if there are problematic local commits + # Allow commits if: + # 1. Only .github/ changes (CI/CD config) + # 2. Has "dev setup/version" commits (personal development environment) + if [ "$COMMITS_AHEAD" -gt 0 ] && [ "$NON_GITHUB_CHANGES" -gt 0 ]; then + if [ "$DEV_SETUP_COMMITS" -eq 0 ]; then + echo "❌ Local master has commits outside .github/ that are not 'dev setup/version' commits!" + echo "merge_status=conflict" >> $GITHUB_OUTPUT + exit 1 + else + echo "✓ Non-.github/ changes are from 'dev setup/version' commits - allowed" + fi + fi + + # Already up to date + if [ "$COMMITS_BEHIND" -eq 0 ]; then + echo "✓ Already up to date with upstream" + echo "merge_status=uptodate" >> $GITHUB_OUTPUT + exit 0 + fi + + # Try fast-forward first (clean case) + if [ "$COMMITS_AHEAD" -eq 0 ]; then + echo "Fast-forwarding to upstream (no local commits)..." + git merge --ff-only upstream/master + echo "merge_status=success" >> $GITHUB_OUTPUT + exit 0 + fi + + # Local commits exist (.github/ and/or dev setup/version) - rebase onto upstream + if [ "$DEV_SETUP_COMMITS" -gt 0 ]; then + echo "Rebasing local CI/CD and dev setup/version commits onto upstream..." + else + echo "Rebasing local CI/CD commits (.github/ only) onto upstream..." + fi + + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + if git rebase upstream/master; then + echo "✓ Successfully rebased local commits onto upstream" + echo "merge_status=success" >> $GITHUB_OUTPUT + else + echo "❌ Rebase conflict occurred" + echo "merge_status=conflict" >> $GITHUB_OUTPUT + + # Abort the failed rebase to clean up state + git rebase --abort + exit 1 + fi + continue-on-error: true + + - name: Push to origin + if: steps.merge.outputs.merge_status == 'success' + run: | + git push origin master --force-with-lease + + COMMITS_SYNCED="${{ steps.check_commits.outputs.commits_behind }}" + echo "✓ Successfully synced $COMMITS_SYNCED commits from upstream" + + - name: Create issue on failure + if: steps.merge.outputs.merge_status == 'conflict' + uses: actions/github-script@v7 + with: + script: | + const title = '🚨 Automated Upstream Sync Failed'; + const body = `## Automatic Sync Failure + + The daily sync from \`postgres/postgres\` failed. + + **Details:** + - Local master has ${{ steps.check_commits.outputs.commits_ahead }} commit(s) not in upstream + - Upstream has ${{ steps.check_commits.outputs.commits_behind }} new commit(s) + - Non-.github/ changes: ${{ steps.check_commits.outputs.non_github_changes }} files + - **Run date:** ${new Date().toISOString()} + + **Root cause:** Commits were made directly to master outside of .github/, which violates the pristine mirror policy. + + **Note:** Commits to .github/ (CI/CD configuration) are allowed and will be preserved during sync. + + ### Resolution Steps: + + 1. Review the conflicting commits: + \`\`\`bash + git log upstream/master..origin/master --oneline + \`\`\` + + 2. Determine if commits should be: + - **Preserved:** Create feature branch and reset master + - **Discarded:** Hard reset master to upstream + + 3. Run the manual sync workflow after resolution to verify + + **Workflow run:** ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + `; + + // Check if issue already exists + const issues = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'sync-failure' + }); + + if (issues.data.length === 0) { + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: title, + body: body, + labels: ['sync-failure', 'automation', 'urgent'] + }); + } else { + // Update existing issue + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issues.data[0].number, + body: `Sync failed again on ${new Date().toISOString()}\n\nWorkflow: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}` + }); + } + + - name: Close sync-failure issues + if: steps.merge.outputs.merge_status == 'success' + uses: actions/github-script@v7 + with: + script: | + const issues = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'sync-failure' + }); + + for (const issue of issues.data) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + body: `✓ Automatic sync successful on ${new Date().toISOString()} - synced ${{ steps.check_commits.outputs.commits_behind }} commits.\n\nClosing issue automatically.` + }); + + await github.rest.issues.update({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + state: 'closed' + }); + } + + - name: Summary + if: always() + run: | + echo "### Daily Sync Summary" >> $GITHUB_STEP_SUMMARY + echo "- **Date:** $(date -u)" >> $GITHUB_STEP_SUMMARY + echo "- **Status:** ${{ steps.merge.outputs.merge_status }}" >> $GITHUB_STEP_SUMMARY + echo "- **Commits synced:** ${{ steps.check_commits.outputs.commits_behind }}" >> $GITHUB_STEP_SUMMARY + + if [ "${{ steps.merge.outputs.merge_status }}" == "success" ]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "✓ Mirror successfully updated with upstream postgres/postgres" >> $GITHUB_STEP_SUMMARY + elif [ "${{ steps.merge.outputs.merge_status }}" == "uptodate" ]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "✓ Mirror already up to date" >> $GITHUB_STEP_SUMMARY + else + echo "" >> $GITHUB_STEP_SUMMARY + echo "⚠️ Sync failed - check created issue for details" >> $GITHUB_STEP_SUMMARY + fi From 440e64b53958f084da4ddb64d6883355e479b744 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Fri, 5 Jun 2026 21:23:20 -0400 Subject: [PATCH 02/36] =?UTF-8?q?ci:=20AI=20PR=20review=20=E2=80=94=20Open?= =?UTF-8?q?=20Code=20Review=20+=20Agora=20MCP=20history?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review every PR (including drafts) with two jobs that authenticate to AWS Bedrock (Claude Opus 4.8) via GitHub OIDC (vars.AWS_ROLE_ARN); no static AWS credentials are stored in the repo. - ocr-review: runs Alibaba Open Code Review through an ephemeral LiteLLM proxy bridging OCR's OpenAI protocol to Bedrock, and posts inline review comments. Uses output_config.effort=xhigh (Opus 4.8 adaptive thinking). Path-scoped rules (.github/ocr/rule.json) encode PostgreSQL community review standards plus reviewer discipline (verify against the diff, don't hallucinate, state confidence, be blunt, accuracy over approval). - pg-history: OCR cannot call MCP, so a separate Bedrock tool-use agent (.github/ocr/pg-history.py) queries the Agora MCP server (pg.ddx.io) to tie the change to git + pgsql-hackers history, and upserts a comment linking threads as https://pg.ddx.io/m/pgsql-hackers/. --- .github/ocr/litellm.yaml | 41 ++++ .github/ocr/pg-history.py | 217 ++++++++++++++++++ .github/ocr/rule.json | 32 +++ .github/workflows/ocr-review.yml | 373 +++++++++++++++++++++++++++++++ 4 files changed, 663 insertions(+) create mode 100644 .github/ocr/litellm.yaml create mode 100644 .github/ocr/pg-history.py create mode 100644 .github/ocr/rule.json create mode 100644 .github/workflows/ocr-review.yml diff --git a/.github/ocr/litellm.yaml b/.github/ocr/litellm.yaml new file mode 100644 index 0000000000000..e23cc4eee6fe2 --- /dev/null +++ b/.github/ocr/litellm.yaml @@ -0,0 +1,41 @@ +# LiteLLM proxy config — bridges Open Code Review (OpenAI protocol) to AWS Bedrock. +# +# This proxy is NOT a hosted service. The ocr-review.yml workflow installs it +# (`pip install 'litellm[proxy]'`) and runs it as a background process bound to +# 127.0.0.1:4000 for the duration of a single GitHub Actions job, then it exits. +# +# Auth to Bedrock: LiteLLM uses boto3's default credential chain, which reads +# the temporary AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY / AWS_SESSION_TOKEN +# minted by the workflow's OIDC "Configure AWS credentials" step; region from +# AWS_REGION. + +model_list: + - model_name: ocr-bedrock + litellm_params: + # Set the repo variable OCR_BEDROCK_MODEL to an Opus inference-profile id + # your account has access to, e.g.: + # bedrock/converse/us.anthropic.claude-opus-4-8 + # The 'converse/' prefix uses Bedrock's Converse API, which is the most + # reliable path for Claude tool-use (what OCR relies on). + model: os.environ/OCR_BEDROCK_MODEL + aws_region_name: os.environ/AWS_REGION + + # "High effort" review. Claude Opus 4.8 on Bedrock uses *adaptive* thinking + # controlled by output_config.effort. Set it DIRECTLY here — NOT via + # reasoning_effort, which LiteLLM still maps to the legacy + # thinking.type.enabled that Opus 4.8 rejects. LiteLLM forwards + # output_config into additionalModelRequestFields for Anthropic models; if + # the build doesn't recognize the effort param it is dropped with a warning + # (no error) and the model reviews at its default effort. + # Valid: low|medium|high|max|xhigh (auto-clamped to the model ceiling). + output_config: + effort: xhigh + max_tokens: 32000 + +litellm_settings: + drop_params: true # silently drop params a model doesn't support + modify_params: true # auto-fix minor request incompatibilities + request_timeout: 600 + +general_settings: + master_key: os.environ/LITELLM_MASTER_KEY diff --git a/.github/ocr/pg-history.py b/.github/ocr/pg-history.py new file mode 100644 index 0000000000000..00ff59225b2b5 --- /dev/null +++ b/.github/ocr/pg-history.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +""" +pg-history: tie a PR's changes to PostgreSQL git + pgsql-hackers email history. + +OCR (the code reviewer) cannot call MCP servers, so this is a separate agent: +it runs a Bedrock (Claude Opus) tool-use loop wired to the Agora MCP server at +https://pg.ddx.io/mcp, lets the model search the mailing-list archives / commit +history / commitfest data, and emits a Markdown summary linking the changes to +the relevant threads (https://pg.ddx.io/m/pgsql-hackers/). + +Env: + PG_HISTORY_MCP_URL MCP endpoint (default https://pg.ddx.io/mcp) + PG_HISTORY_MODEL Bedrock model id (e.g. us.anthropic.claude-opus-4-8) + AWS_REGION region (creds come from the OIDC step's env) + BASE_REF, HEAD_SHA PR base ref and head sha (for the git diff context) + GH_PR_TITLE PR title (optional, adds context) + PG_HISTORY_OUT output markdown path (default /tmp/pg-history.md) +Writes the markdown to PG_HISTORY_OUT; exits 0 even on soft failures (writes a note). +""" +import json, os, subprocess, sys, urllib.request + +MCP_URL = os.environ.get("PG_HISTORY_MCP_URL", "https://pg.ddx.io/mcp") +MODEL = os.environ.get("PG_HISTORY_MODEL", "us.anthropic.claude-opus-4-8").replace("bedrock/converse/", "").replace("bedrock/", "") +REGION = os.environ.get("AWS_REGION", "us-east-1") +BASE_REF = os.environ.get("BASE_REF", "") +HEAD_SHA = os.environ.get("HEAD_SHA", "") +PR_TITLE = os.environ.get("GH_PR_TITLE", "") +OUT = os.environ.get("PG_HISTORY_OUT", "/tmp/pg-history.md") +UA = "pg-history/0.1 (+github-actions)" + +# Curated subset of the 108 Agora tools — the ones useful for connecting a +# change to its discussion/commit history. Intersected with what the server +# actually exposes, so unknown names are harmless. +TOOL_WHITELIST = { + "find_related_discussions", "find_similar_messages", "get_thread", + "discussion_links", "get_author_messages", "browse_by_date", + "blame_symbol", "check_upstream_status", "find_related", + "find_entries_for_thread", "find_entries_for_author", "get_commit", + "search", "hybrid_search", "get_callers", "get_callees", "find_pattern", +} +MAX_ROUNDS = 14 +TOOL_RESULT_CAP = 8000 # chars per tool result fed back to the model + + +def _mcp_post(body, sid=None): + headers = {"Content-Type": "application/json", + "Accept": "application/json, text/event-stream", "User-Agent": UA} + if sid: + headers["Mcp-Session-Id"] = sid + req = urllib.request.Request(MCP_URL, data=json.dumps(body).encode(), headers=headers, method="POST") + resp = urllib.request.urlopen(req, timeout=60) + sid_out = resp.headers.get("Mcp-Session-Id") + result = None + for line in resp.read().decode().splitlines(): + line = line.strip() + if line.startswith("data:"): + line = line[5:].strip() + if not line or line.startswith("event:"): + continue + try: + obj = json.loads(line) + except Exception: + continue + if isinstance(obj, dict) and ("result" in obj or "error" in obj): + result = obj + return result, sid_out + + +class MCP: + def __init__(self): + init, self.sid = _mcp_post({"jsonrpc": "2.0", "id": 1, "method": "initialize", + "params": {"protocolVersion": "2025-06-18", "capabilities": {}, + "clientInfo": {"name": "pg-history", "version": "0.1"}}}) + if not init or "result" not in init: + raise RuntimeError(f"MCP initialize failed: {init}") + try: + _mcp_post({"jsonrpc": "2.0", "method": "notifications/initialized", "params": {}}, self.sid) + except Exception: + pass + self._id = 1 + + def list_tools(self): + self._id += 1 + res, _ = _mcp_post({"jsonrpc": "2.0", "id": self._id, "method": "tools/list", "params": {}}, self.sid) + return (res or {}).get("result", {}).get("tools", []) + + def call(self, name, args): + self._id += 1 + res, _ = _mcp_post({"jsonrpc": "2.0", "id": self._id, "method": "tools/call", + "params": {"name": name, "arguments": args or {}}}, self.sid) + if not res: + return "(no response)" + if "error" in res: + return f"ERROR: {json.dumps(res['error'])[:500]}" + parts = [] + for c in res.get("result", {}).get("content", []): + if c.get("type") == "text": + parts.append(c["text"]) + return ("\n".join(parts) or "(empty)")[:TOOL_RESULT_CAP] + + +def git(*args): + try: + return subprocess.check_output(["git", *args], text=True, stderr=subprocess.DEVNULL).strip() + except Exception: + return "" + + +def pr_context(): + base = f"origin/{BASE_REF}" if BASE_REF else "" + rng = f"{base}..{HEAD_SHA}" if base and HEAD_SHA else HEAD_SHA + commits = git("log", "--no-merges", "--format=%h %s", f"{rng}") if rng else "" + stat = git("diff", "--stat", rng) if rng else "" + files = git("diff", "--name-only", rng) if rng else "" + return commits[:4000], stat[:3000], files[:2000] + + +SYSTEM = """You are a PostgreSQL community research assistant. Given a pull request's +commits and changed files, use the available tools (backed by the Agora index of +pgsql-hackers mail, commit history, and commitfest data) to connect the change to +its history. Your goal: + +- Find the mailing-list thread(s) and prior discussion behind this change. +- Identify related/superseded prior commits and any commitfest entry. +- Note relevant prior art, rejected approaches, or design rationale. + +Rules (voice & rigor): +- Be precise and blunt. No praise, no filler, no hedging, no disclaimers. Accuracy is + the only success metric — not the author's approval. Lead with the most important finding. +- NEVER hallucinate. Verify every Message-ID, thread subject, commit hash, author name, + and date against an actual tool result before citing it. If a search returns nothing, + say so plainly — do not guess or fabricate a plausible-looking link. +- Assess the change on its merits, independent of how the PR frames it. +- Tag any inferred (not tool-confirmed) linkage with an explicit confidence level: + high / moderate / low. +- Be decisive and efficient: a handful of targeted tool calls, not exhaustive search. +- Cite every mailing-list message as a Markdown link: [subject](https://pg.ddx.io/m/pgsql-hackers/MESSAGE_ID). +- If you find nothing relevant, say so in one line — do not pad. + +When done, output ONLY Markdown (no preamble) with these sections, omitting any that are empty: +## 🧵 Related discussion +## 🔗 Related commits / prior art +## 📋 Commitfest +## 🧭 Context for reviewers +Keep it tight (use bullets; link generously).""" + + +def to_toolspec(t): + schema = t.get("inputSchema") or {"type": "object", "properties": {}} + return {"toolSpec": {"name": t["name"], + "description": (t.get("description") or "")[:600], + "inputSchema": {"json": schema}}} + + +def main(): + commits, stat, files = pr_context() + if not commits and not files: + open(OUT, "w").write("") # nothing to do + print("No PR diff context; skipping.") + return + user = (f"PR title: {PR_TITLE}\n\n" if PR_TITLE else "") + \ + f"Commits:\n{commits or '(none)'}\n\nChanged files:\n{files or '(none)'}\n\nDiffstat:\n{stat or '(none)'}\n" + + try: + mcp = MCP() + tools = [to_toolspec(t) for t in mcp.list_tools() if t.get("name") in TOOL_WHITELIST] + except Exception as e: + open(OUT, "w").write(f"_pg-history: could not reach the Agora MCP server ({MCP_URL}): {e}_\n") + print(f"MCP unavailable: {e}") + return + if not tools: + open(OUT, "w").write("_pg-history: no usable MCP tools available._\n") + return + + import boto3 + brt = boto3.client("bedrock-runtime", region_name=REGION) + messages = [{"role": "user", "content": [{"text": user}]}] + final_text = "" + try: + for _ in range(MAX_ROUNDS): + resp = brt.converse( + modelId=MODEL, + system=[{"text": SYSTEM}], + messages=messages, + toolConfig={"tools": tools}, + inferenceConfig={"maxTokens": 4096}, + ) + out = resp["output"]["message"] + messages.append(out) + if resp.get("stopReason") == "tool_use": + results = [] + for blk in out["content"]: + tu = blk.get("toolUse") + if not tu: + continue + res_text = mcp.call(tu["name"], tu.get("input") or {}) + results.append({"toolResult": {"toolUseId": tu["toolUseId"], + "content": [{"text": res_text}]}}) + messages.append({"role": "user", "content": results}) + continue + final_text = "".join(b.get("text", "") for b in out["content"]).strip() + break + except Exception as e: + open(OUT, "w").write(f"_pg-history: Bedrock call failed: {e}_\n") + print(f"Bedrock error: {e}") + return + + if not final_text: + final_text = "_pg-history: no related history found._" + body = "## 📜 Change history & discussion (Agora / pg.ddx.io)\n\n" + final_text + \ + "\n\nGenerated by pg-history via the Agora MCP server (pg.ddx.io).\n" + open(OUT, "w").write(body) + print(body) + + +if __name__ == "__main__": + main() diff --git a/.github/ocr/rule.json b/.github/ocr/rule.json new file mode 100644 index 0000000000000..de9e80712d4c7 --- /dev/null +++ b/.github/ocr/rule.json @@ -0,0 +1,32 @@ +{ + "rules": [ + { + "path": "src/test/regress/sql/**", + "rule": "REVIEW DISCIPLINE: Be precise and blunt; lead with the most serious problem and don't soften it. Verify every claim against the actual diff — confirm function names, signatures, line numbers, and APIs before asserting; never invent behavior or cite code not present in the change. If unsure, say so instead of guessing, and tag each finding's confidence (high/moderate/low). No praise, no validating the author, no disclaimers; accuracy is the only success metric. Judge the change on its merits regardless of how it is framed. PostgreSQL regression test (.sql). Require deterministic, portable output: ORDER BY where row order matters; no timing/plan-dependent output except intentional EXPLAIN tests; no absolute paths; locale-independent (C collation or explicit COLLATE); DROP objects the test creates. Confirm the matching expected/ output stays stable across platforms (Windows/Linux/BSD) and the parallel schedule. New tests should cover edge cases (NULL, empty sets, boundary/overflow values) and error paths, not just the happy path." + }, + { + "path": "**/*.{sql,pgsql}", + "rule": "REVIEW DISCIPLINE: Be precise and blunt; lead with the most serious problem and don't soften it. Verify every claim against the actual diff — confirm function names, signatures, line numbers, and APIs before asserting; never invent behavior or cite code not present in the change. If unsure, say so instead of guessing, and tag each finding's confidence (high/moderate/low). No praise, no validating the author, no disclaimers; accuracy is the only success metric. Judge the change on its merits regardless of how it is framed. PostgreSQL SQL. Valid PostgreSQL dialect (not MySQL/Oracle); correct types (BIGINT vs INT, TEXT vs VARCHAR); sound transaction/isolation and CTE-materialization assumptions. SECURITY: flag SQL injection in dynamic SQL (require quote_identifier/quote_literal or format() with %I/%L), SECURITY DEFINER functions without a locked-down search_path, and inappropriate RLS bypass. Prefer set-based over N+1. BACKWARDS COMPATIBILITY (a top PostgreSQL rejection reason): changing the result/behavior of existing SQL, output of existing functions, or default GUCs needs extraordinary justification." + }, + { + "path": "**/*.{c,h}", + "rule": "REVIEW DISCIPLINE: Be precise and blunt; lead with the most serious problem and don't soften it. Verify every claim against the actual diff — confirm function names, signatures, line numbers, and APIs before asserting; never invent behavior or cite code not present in the change. If unsure, say so instead of guessing, and tag each finding's confidence (high/moderate/low). No praise, no validating the author, no disclaimers; accuracy is the only success metric. Judge the change on its merits regardless of how it is framed. PostgreSQL backend C. Review the way pgsql-hackers does, roughly in this priority order.\n\n(1) CORRECTNESS — highest priority: memory safety (every palloc has a matching pfree or a documented MemoryContext lifetime; error paths via ereport/elog(ERROR) must not leak memory, buffers, locks, or fds — rely on the right MemoryContext/ResourceOwner or PG_TRY/PG_CATCH; no use-after-free; temp contexts deleted). Concurrency: consistent lock ordering (deadlock-free), correct lock levels, balanced START_CRIT_SECTION/END_CRIT_SECTION, spinlock/LWLock for shared state, no TOCTOU races, signal/interrupt safety (CHECK_FOR_INTERRUPTS), and WAL changes that are logged AND correctly replayed. NULL handling and edge cases (empty/zero rows, max values, overflow).\n\n(2) BACKWARDS COMPATIBILITY — the strongest PostgreSQL constraint: don't break behavioral compatibility, dump/restore, pg_upgrade, the libpq wire protocol, logical-replication protocol, or exported APIs without deprecation. Flag any such break for extraordinary justification.\n\n(3) CATALOG CHANGES: any change to system catalog contents/structure must bump CATALOG_VERSION_NO in src/include/catalog/catversion.h and handle pg_upgrade. New Node fields need copy/equal/out/read func updates.\n\n(4) PERFORMANCE: no regression on common paths; avoid O(n^2) where O(n log n)/O(n) is feasible; minimize work under contended locks; avoid needless palloc churn and large struct copies in hot paths.\n\n(5) SECURITY: bounds on string ops (snprintf/strlcpy, never strcpy/sprintf), integer/size-overflow checks, never user input as a format string, privilege checks via pg_*_aclcheck.\n\n(6) CONVENTIONS: error messages = lowercase start, no trailing period, correct ERRCODE_*, primary vs errdetail/errhint split; Assert() only for can't-happen invariants; naming (snake_case funcs like heap_insert with subsystem prefix, or CamelCase for major subsystems like ExecInitNode; ALL_CAPS macros); code must pgindent cleanly (tabs to indent, width 4). Beware over-engineering/speculative abstraction and reimplementing existing helpers — the community prefers minimal, targeted changes that fit the subsystem's existing patterns." + }, + { + "path": "**/{meson.build,meson_options.txt}", + "rule": "REVIEW DISCIPLINE: Be precise and blunt; lead with the most serious problem and don't soften it. Verify every claim against the actual diff — confirm function names, signatures, line numbers, and APIs before asserting; never invent behavior or cite code not present in the change. If unsure, say so instead of guessing, and tag each finding's confidence (high/moderate/low). No praise, no validating the author, no disclaimers; accuracy is the only success metric. Judge the change on its merits regardless of how it is framed. PostgreSQL Meson build. Valid meson syntax; correct subdir()/dependency declarations and install paths; any new option mirrors the equivalent Autoconf/configure feature and stays in sync with the Makefile build so the two don't drift." + }, + { + "path": "**/{Makefile,GNUmakefile,*.mk}", + "rule": "REVIEW DISCIPLINE: Be precise and blunt; lead with the most serious problem and don't soften it. Verify every claim against the actual diff — confirm function names, signatures, line numbers, and APIs before asserting; never invent behavior or cite code not present in the change. If unsure, say so instead of guessing, and tag each finding's confidence (high/moderate/low). No praise, no validating the author, no disclaimers; accuracy is the only success metric. Judge the change on its merits regardless of how it is framed. PostgreSQL Makefile. GNU Make syntax with $(VAR) refs; correct .PHONY; accurate deps (no parallel-build races); $(MAKE) for recursion; VPATH/out-of-tree build support; no hardcoded paths (use PostgreSQL's standard vars); clean/distclean/maintainer-clean handle new artifacts; extensions use PGXS. Keep in sync with meson.build." + }, + { + "path": "doc/**/*.sgml", + "rule": "REVIEW DISCIPLINE: Be precise and blunt; lead with the most serious problem and don't soften it. Verify every claim against the actual diff — confirm function names, signatures, line numbers, and APIs before asserting; never invent behavior or cite code not present in the change. If unsure, say so instead of guessing, and tag each finding's confidence (high/moderate/low). No praise, no validating the author, no disclaimers; accuracy is the only success metric. Judge the change on its merits regardless of how it is framed. PostgreSQL documentation (DocBook SGML). Technically accurate and complete (params, limitations, version/compat notes); correct tag usage and nesting (, , , , , /); working cross-references; spell it 'PostgreSQL' in prose; SQL keywords uppercase in examples; commands/literals/filenames in the right tags. New user-facing behavior in code should come with matching doc changes." + }, + { + "path": "**/*.md", + "rule": "REVIEW DISCIPLINE: Be precise and blunt; lead with the most serious problem and don't soften it. Verify every claim against the actual diff — confirm function names, signatures, line numbers, and APIs before asserting; never invent behavior or cite code not present in the change. If unsure, say so instead of guessing, and tag each finding's confidence (high/moderate/low). No praise, no validating the author, no disclaimers; accuracy is the only success metric. Judge the change on its merits regardless of how it is framed. Markdown docs. Clear heading hierarchy; fenced code blocks with language hints; accurate instructions/prerequisites; consistent PostgreSQL terminology; no broken relative links or stale claims." + } + ] +} diff --git a/.github/workflows/ocr-review.yml b/.github/workflows/ocr-review.yml new file mode 100644 index 0000000000000..f6e8339f6bb2e --- /dev/null +++ b/.github/workflows/ocr-review.yml @@ -0,0 +1,373 @@ +# Open Code Review (OCR) — AI PR review backed by AWS Bedrock via a LiteLLM proxy. +# +# Flow: +# PR opened/updated (incl. DRAFTS) ─┐ +# /open-code-review PR comment ─┼─► start LiteLLM (127.0.0.1:4000 → Bedrock) +# manual workflow_dispatch ─┘ └► ocr review --format json +# └► post inline PR review comments +# +# Required (repo settings — all repo *variables*, no secrets; auth is via GitHub OIDC): +# vars.AWS_ROLE_ARN - IAM role to assume via OIDC (granting bedrock:InvokeModel*) +# vars.AWS_REGION - e.g. us-east-1 +# vars.OCR_BEDROCK_MODEL - LiteLLM model string for the Opus inference profile, e.g. +# bedrock/converse/us.anthropic.claude-opus-4-8 +# +# No static AWS keys are stored. GITHUB_TOKEN (auto) posts the review comments. + +name: OCR AI Review + +on: + pull_request: + # Note: no draft filter — drafts are reviewed too. + types: [opened, synchronize, reopened, ready_for_review] + issue_comment: + types: [created] + workflow_dispatch: + inputs: + pr_number: + description: 'PR number to review' + required: true + type: number + +# One review per PR; cancel superseded runs to save Bedrock spend. +concurrency: + group: ocr-review-${{ github.event.pull_request.number || github.event.issue.number || github.event.inputs.pr_number }} + cancel-in-progress: true + +permissions: + id-token: write # required to mint the GitHub OIDC token for AWS role assumption + contents: read + pull-requests: write + +jobs: + ocr-review: + runs-on: ubuntu-latest + # PR events always; comment events only when the comment is on a PR and + # starts with the trigger keyword; manual dispatch always. + if: | + github.event_name == 'pull_request' || + github.event_name == 'workflow_dispatch' || + (github.event_name == 'issue_comment' && github.event.issue.pull_request && + (startsWith(github.event.comment.body, '/open-code-review') || + startsWith(github.event.comment.body, '@open-code-review'))) + + env: + # LiteLLM listens on localhost only; this key never leaves the runner. + LITELLM_MASTER_KEY: sk-ocr-ci-local + OCR_BEDROCK_MODEL: ${{ vars.OCR_BEDROCK_MODEL }} + # Region is a static var (safe at job level). AWS credentials are NOT set + # here — they're minted by the OIDC "Configure AWS credentials" step below + # and exported to the environment for the LiteLLM/boto3 Bedrock calls. + AWS_REGION: ${{ vars.AWS_REGION }} + + steps: + - name: Resolve PR context + id: pr + uses: actions/github-script@v9 + with: + script: | + let prNumber; + if (context.eventName === 'pull_request') { + prNumber = context.payload.pull_request.number; + } else if (context.eventName === 'issue_comment') { + prNumber = context.issue.number; + } else { + prNumber = parseInt('${{ github.event.inputs.pr_number }}', 10); + } + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + }); + const { data: repo } = await github.rest.repos.get({ + owner: context.repo.owner, + repo: context.repo.repo, + }); + core.setOutput('number', String(prNumber)); + core.setOutput('base_ref', pr.base.ref); + core.setOutput('head_ref', pr.head.ref); + core.setOutput('head_sha', pr.head.sha); + core.setOutput('default_branch', repo.default_branch); + core.setOutput('cross_repo', String(pr.head.repo.full_name !== pr.base.repo.full_name)); + + # NOTE: do NOT checkout the PR head. OCR reads the diff and file contents + # straight from git refs (git diff , git show :path, + # git grep ), so the working tree is irrelevant — but our OCR config + # lives on the default branch, not on the PR branch. We check out the repo + # (default ref), fetch the base/head objects, and materialize the config + # from origin/. + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Prepare git refs and OCR config + env: + BASE_REF: ${{ steps.pr.outputs.base_ref }} + HEAD_REF: ${{ steps.pr.outputs.head_ref }} + HEAD_SHA: ${{ steps.pr.outputs.head_sha }} + DEFAULT_BRANCH: ${{ steps.pr.outputs.default_branch }} + run: | + git fetch --no-tags origin "+refs/heads/${DEFAULT_BRANCH}:refs/remotes/origin/${DEFAULT_BRANCH}" || true + git fetch --no-tags origin "+refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}" || true + git fetch --no-tags origin "+refs/heads/${HEAD_REF}:refs/remotes/origin/${HEAD_REF}" || true + git fetch --no-tags origin "${HEAD_SHA}" || true + + # OCR config lives on the default branch; materialize it independently + # of whatever ref is checked out. + mkdir -p "$RUNNER_TEMP/ocr" + git show "origin/${DEFAULT_BRANCH}:.github/ocr/litellm.yaml" > "$RUNNER_TEMP/ocr/litellm.yaml" + git show "origin/${DEFAULT_BRANCH}:.github/ocr/rule.json" > "$RUNNER_TEMP/ocr/rule.json" + echo "Config materialized:"; ls -l "$RUNNER_TEMP/ocr" + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: '20' + + - name: Install LiteLLM proxy + Open Code Review + run: | + python -m pip install --upgrade pip + # Pin LiteLLM to a main commit that supports Claude Opus 4.8 adaptive + # thinking (maps reasoning_effort -> output_config.effort, incl. xhigh). + # Not in any tagged release yet (PyPI latest 1.87.1 lacks the Opus + # normalizer). Bump this SHA once a release ships the feature. + pip install "litellm[proxy] @ git+https://github.com/BerriAI/litellm.git@5be0797d24a2f26eb2123e13788f90055a59d91d" + npm install -g @alibaba-group/open-code-review + + - name: Configure AWS credentials (OIDC) + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: ${{ vars.AWS_ROLE_ARN }} + aws-region: ${{ vars.AWS_REGION }} + role-session-name: ocr-review-${{ github.run_id }} + + - name: Start LiteLLM proxy (Bedrock bridge) + run: | + if [ -z "$OCR_BEDROCK_MODEL" ]; then + echo "::error::vars.OCR_BEDROCK_MODEL is not set (e.g. bedrock/converse/us.anthropic.claude-opus-4-1-20250805-v1:0)" + exit 1 + fi + nohup litellm --config "$RUNNER_TEMP/ocr/litellm.yaml" --host 127.0.0.1 --port 4000 \ + > /tmp/litellm.log 2>&1 & + echo "Waiting for LiteLLM to become ready..." + for i in $(seq 1 60); do + if curl -sf http://127.0.0.1:4000/health/readiness >/dev/null; then + echo "LiteLLM ready."; exit 0 + fi + sleep 2 + done + echo "::error::LiteLLM did not become ready in time"; cat /tmp/litellm.log; exit 1 + + - name: Configure OCR + run: | + ocr config set llm.url http://127.0.0.1:4000/v1/chat/completions + ocr config set llm.auth_token "$LITELLM_MASTER_KEY" + ocr config set llm.model ocr-bedrock + ocr config set llm.use_anthropic false + ocr config set language English + + - name: Run OCR review + run: | + ocr review \ + --from "origin/${{ steps.pr.outputs.base_ref }}" \ + --to "${{ steps.pr.outputs.head_sha }}" \ + --rule "$RUNNER_TEMP/ocr/rule.json" \ + --concurrency 3 \ + --timeout 20 \ + --format json \ + > /tmp/ocr-result.json 2>/tmp/ocr-stderr.log || true + echo "----- OCR stdout -----"; cat /tmp/ocr-result.json || true + echo "----- OCR stderr -----"; cat /tmp/ocr-stderr.log || true + echo "----- LiteLLM log (tail) -----"; tail -n 50 /tmp/litellm.log || true + + - name: Post review to PR + uses: actions/github-script@v9 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const prNumber = parseInt('${{ steps.pr.outputs.number }}', 10); + const commitSha = '${{ steps.pr.outputs.head_sha }}'; + + let result; + try { + result = JSON.parse(fs.readFileSync('/tmp/ocr-result.json', 'utf8')); + } catch (e) { + const stderr = (() => { try { return fs.readFileSync('/tmp/ocr-stderr.log','utf8').trim(); } catch { return ''; } })(); + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, + body: `⚠️ **OCR** could not produce a review.\n\n\`\`\`\n${(stderr || e.message).slice(0, 8000)}\n\`\`\``, + }); + return; + } + + const comments = result.comments || []; + const warnings = result.warnings || []; + + const formatComment = (c) => { + let body = c.content || ''; + if (c.suggestion_code && c.existing_code) { + body += '\n\n```suggestion\n' + c.suggestion_code + (c.suggestion_code.endsWith('\n') ? '' : '\n') + '```'; + } + return body; + }; + const formatMarkdown = (c) => { + let md = `### 📄 \`${c.path}\``; + if (c.start_line && c.end_line) md += ` (L${c.start_line}-L${c.end_line})`; + md += '\n\n' + (c.content || ''); + if (c.suggestion_code && c.existing_code) { + md += '\n\n
💡 Suggested change\n\n'; + md += '**Before:**\n```\n' + c.existing_code + '\n```\n\n**After:**\n```\n' + c.suggestion_code + '\n```\n\n
'; + } + return md; + }; + + if (comments.length === 0) { + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, + body: `✅ **OCR**: ${result.message || 'No issues found.'}`, + }); + return; + } + + const inline = []; + const noLine = []; + for (const c of comments) { + const body = formatComment(c); + const hasLine = (c.start_line >= 1) || (c.end_line >= 1); + if (!hasLine) { noLine.push(c); continue; } + const rc = { path: c.path, body, side: 'RIGHT' }; + if (c.start_line >= 1 && c.end_line >= 1 && c.start_line !== c.end_line) { + rc.start_line = c.start_line; rc.line = c.end_line; rc.start_side = 'RIGHT'; + } else { + rc.line = c.end_line >= 1 ? c.end_line : c.start_line; + } + inline.push(rc); + } + + let summary = `🔍 **OCR** found **${comments.length}** issue(s).`; + summary += `\n- ${inline.length} inline, ${noLine.length} in summary`; + if (warnings.length) summary += `\n- ⚠️ ${warnings.length} warning(s) during review`; + for (const c of noLine) summary += '\n\n---\n\n' + formatMarkdown(c); + + try { + await github.rest.pulls.createReview({ + owner: context.repo.owner, repo: context.repo.repo, pull_number: prNumber, + commit_id: commitSha, body: summary, event: 'COMMENT', comments: inline, + }); + } catch (e) { + // Fallback: a couple of comments may have bad positions; post them individually. + let ok = 0; const failed = []; + for (const rc of inline) { + try { + await github.rest.pulls.createReview({ + owner: context.repo.owner, repo: context.repo.repo, pull_number: prNumber, + commit_id: commitSha, body: '', event: 'COMMENT', comments: [rc], + }); + ok++; + } catch (inner) { failed.push(`\`${rc.path}\`: ${inner.message}`); } + } + let body = summary + `\n\n---\n📊 Posted ${ok}/${inline.length} inline comment(s).`; + if (failed.length) body += '\n\n
Failed\n\n' + failed.join('\n') + '\n
'; + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, body, + }); + } + + # Companion job: OCR can't call MCP, so this separate agent ties the PR's + # changes to PostgreSQL git + pgsql-hackers history via the Agora MCP server + # (pg.ddx.io) and posts a single, upserted "history & discussion" comment. + pg-history: + runs-on: ubuntu-latest + if: | + github.event_name == 'pull_request' || + github.event_name == 'workflow_dispatch' || + (github.event_name == 'issue_comment' && github.event.issue.pull_request && + (startsWith(github.event.comment.body, '/open-code-review') || + startsWith(github.event.comment.body, '@open-code-review') || + startsWith(github.event.comment.body, '/pg-history'))) + steps: + - name: Resolve PR context + id: pr + uses: actions/github-script@v9 + with: + script: | + let prNumber; + if (context.eventName === 'pull_request') prNumber = context.payload.pull_request.number; + else if (context.eventName === 'issue_comment') prNumber = context.issue.number; + else prNumber = parseInt('${{ github.event.inputs.pr_number }}', 10); + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, repo: context.repo.repo, pull_number: prNumber }); + core.setOutput('number', String(prNumber)); + core.setOutput('base_ref', pr.base.ref); + core.setOutput('head_sha', pr.head.sha); + core.setOutput('title', pr.title || ''); + + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Make base/head refs available + env: + BASE_REF: ${{ steps.pr.outputs.base_ref }} + HEAD_SHA: ${{ steps.pr.outputs.head_sha }} + run: | + git fetch --no-tags origin "+refs/heads/${BASE_REF}:refs/remotes/origin/${BASE_REF}" || true + git fetch --no-tags origin "${HEAD_SHA}" || true + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Configure AWS credentials (OIDC) + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: ${{ vars.AWS_ROLE_ARN }} + aws-region: ${{ vars.AWS_REGION }} + role-session-name: pg-history-${{ github.run_id }} + + - name: Install deps + run: pip install boto3 + + - name: Run pg-history (Agora MCP) + env: + PG_HISTORY_MODEL: ${{ vars.OCR_BEDROCK_MODEL }} + AWS_REGION: ${{ vars.AWS_REGION }} + BASE_REF: ${{ steps.pr.outputs.base_ref }} + HEAD_SHA: ${{ steps.pr.outputs.head_sha }} + GH_PR_TITLE: ${{ steps.pr.outputs.title }} + PG_HISTORY_OUT: ${{ runner.temp }}/pg-history.md + run: | + python .github/ocr/pg-history.py || true + echo "----- output -----"; cat "${{ runner.temp }}/pg-history.md" 2>/dev/null || echo "(no output)" + + - name: Upsert PR comment + uses: actions/github-script@v9 + with: + script: | + const fs = require('fs'); + const path = process.env.RUNNER_TEMP + '/pg-history.md'; + let body = ''; + try { body = fs.readFileSync(path, 'utf8').trim(); } catch (e) {} + if (!body) { console.log('pg-history: empty output, nothing to post'); return; } + const prNumber = parseInt('${{ steps.pr.outputs.number }}', 10); + const marker = ''; + body = marker + '\n' + body; + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, per_page: 100 }); + const mine = comments.find(c => c.user.type === 'Bot' && c.body && c.body.includes(marker)); + if (mine) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, repo: context.repo.repo, comment_id: mine.id, body }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, body }); + } From d82269c5c1e6114b5a29bd7f207038447f7208fe Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Wed, 13 Aug 2025 10:58:56 -0700 Subject: [PATCH 03/36] Add support for pytest test suites Specify --enable-pytest/-Dpytest=enabled at configure time. This contains no Postgres test logic -- it is just a "vanilla" pytest skeleton. This contains a custom pytest plugin to generate TAP output. This plugin is used by the Meson mtest runner, to show relevant information for failed tests. The pytest-tap plugin would have been preferable, but it's now in maintenance mode, and it has problems with accidentally suppressing important collection failures. Co-authored-by: Jelte Fennema-Nio --- .gitignore | 3 + configure | 166 +++++++++++++++++++++++++++++- configure.ac | 24 ++++- meson.build | 100 ++++++++++++++++++ meson_options.txt | 8 +- pyproject.toml | 21 ++++ src/Makefile.global.in | 29 ++++++ src/makefiles/meson.build | 2 + src/test/Makefile | 1 + src/test/meson.build | 1 + src/test/pytest/Makefile | 20 ++++ src/test/pytest/README | 1 + src/test/pytest/meson.build | 15 +++ src/test/pytest/pgtap.py | 197 ++++++++++++++++++++++++++++++++++++ src/tools/testwrap | 6 +- 15 files changed, 588 insertions(+), 6 deletions(-) create mode 100644 pyproject.toml create mode 100644 src/test/pytest/Makefile create mode 100644 src/test/pytest/README create mode 100644 src/test/pytest/meson.build create mode 100644 src/test/pytest/pgtap.py diff --git a/.gitignore b/.gitignore index 4e911395fe3ba..a550ce6194b85 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ win32ver.rc *.exe lib*dll.def lib*.pc +__pycache__/ # Local excludes in root directory /GNUmakefile @@ -43,3 +44,5 @@ lib*.pc /Release/ /tmp_install/ /portlock/ +/.venv/ +/uv.lock diff --git a/configure b/configure index 5f77f3cac29f3..dd53ac60acfaf 100755 --- a/configure +++ b/configure @@ -630,6 +630,8 @@ vpath_build PG_SYSROOT PG_VERSION_NUM LDFLAGS_EX_BE +UV +PYTEST PROVE DBTOEPUB FOP @@ -773,6 +775,7 @@ CFLAGS CC enable_injection_points PG_TEST_EXTRA +enable_pytest enable_tap_tests enable_dtrace DTRACEFLAGS @@ -851,6 +854,7 @@ enable_profiling enable_coverage enable_dtrace enable_tap_tests +enable_pytest enable_injection_points with_blocksize with_segsize @@ -1551,7 +1555,10 @@ Optional Features: --enable-profiling build with profiling enabled --enable-coverage build with coverage testing instrumentation --enable-dtrace build with DTrace support - --enable-tap-tests enable TAP tests (requires Perl and IPC::Run) + --enable-tap-tests enable (Perl-based) TAP tests (requires Perl and + IPC::Run) + --enable-pytest enable (Python-based) pytest suites (requires + Python) --enable-injection-points enable injection points (for testing) --enable-depend turn on automatic dependency tracking @@ -3634,7 +3641,7 @@ fi # -# TAP tests +# Test frameworks # @@ -3662,6 +3669,32 @@ fi + +# Check whether --enable-pytest was given. +if test "${enable_pytest+set}" = set; then : + enableval=$enable_pytest; + case $enableval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --enable-pytest option" "$LINENO" 5 + ;; + esac + +else + enable_pytest=no + +fi + + + + + + # # Injection points # @@ -19523,6 +19556,135 @@ $as_echo "$modulestderr" >&6; } fi fi +if test "$enable_pytest" = yes; then + if test -z "$PYTEST"; then + for ac_prog in pytest py.test +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_PYTEST+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $PYTEST in + [\\/]* | ?:[\\/]*) + ac_cv_path_PYTEST="$PYTEST" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_PYTEST="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +PYTEST=$ac_cv_path_PYTEST +if test -n "$PYTEST"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PYTEST" >&5 +$as_echo "$PYTEST" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$PYTEST" && break +done + +else + # Report the value of PYTEST in configure's output in all cases. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PYTEST" >&5 +$as_echo_n "checking for PYTEST... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PYTEST" >&5 +$as_echo "$PYTEST" >&6; } +fi + + if test -z "$PYTEST"; then + # If pytest not found, try installing with uv + if test -z "$UV"; then + for ac_prog in uv +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_UV+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $UV in + [\\/]* | ?:[\\/]*) + ac_cv_path_UV="$UV" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_UV="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +UV=$ac_cv_path_UV +if test -n "$UV"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $UV" >&5 +$as_echo "$UV" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$UV" && break +done + +else + # Report the value of UV in configure's output in all cases. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for UV" >&5 +$as_echo_n "checking for UV... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $UV" >&5 +$as_echo "$UV" >&6; } +fi + + if test -n "$UV"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether uv can install pytest dependencies" >&5 +$as_echo_n "checking whether uv can install pytest dependencies... " >&6; } + if "$UV" pip install "$srcdir" >&5 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + PYTEST="$UV run pytest" + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + as_fn_error $? "pytest not found and uv failed to install dependencies" "$LINENO" 5 + fi + else + as_fn_error $? "pytest not found" "$LINENO" 5 + fi + fi +fi + # If compiler will take -Wl,--as-needed (or various platform-specific # spellings thereof) then add that to LDFLAGS. This is much easier than # trying to filter LIBS to the minimum for each executable. diff --git a/configure.ac b/configure.ac index 61cee42daa721..a5642b5e872aa 100644 --- a/configure.ac +++ b/configure.ac @@ -226,11 +226,16 @@ AC_SUBST(DTRACEFLAGS)]) AC_SUBST(enable_dtrace) # -# TAP tests +# Test frameworks # PGAC_ARG_BOOL(enable, tap-tests, no, - [enable TAP tests (requires Perl and IPC::Run)]) + [enable (Perl-based) TAP tests (requires Perl and IPC::Run)]) AC_SUBST(enable_tap_tests) + +PGAC_ARG_BOOL(enable, pytest, no, + [enable (Python-based) pytest suites (requires Python)]) +AC_SUBST(enable_pytest) + AC_ARG_VAR(PG_TEST_EXTRA, [enable selected extra tests (overridden at runtime by PG_TEST_EXTRA environment variable)]) @@ -2502,6 +2507,21 @@ if test "$enable_tap_tests" = yes; then fi fi +if test "$enable_pytest" = yes; then + PGAC_PATH_PROGS(PYTEST, [pytest py.test]) + if test -z "$PYTEST"; then + # Try python -m pytest as a fallback + AC_MSG_CHECKING([whether python -m pytest works]) + if "$PYTHON" -m pytest --version >&AS_MESSAGE_LOG_FD 2>&1; then + AC_MSG_RESULT([yes]) + PYTEST="$PYTHON -m pytest" + else + AC_MSG_RESULT([no]) + AC_MSG_ERROR([pytest not found]) + fi + fi +fi + # If compiler will take -Wl,--as-needed (or various platform-specific # spellings thereof) then add that to LDFLAGS. This is much easier than # trying to filter LIBS to the minimum for each executable. diff --git a/meson.build b/meson.build index 568e0e150bfa8..6f94a7b3bf152 100644 --- a/meson.build +++ b/meson.build @@ -1827,6 +1827,47 @@ endif +############################################################### +# Library: pytest +############################################################### + +pytest_enabled = false +pytest_version = '' +pytest_cmd = ['pytest'] # dummy, overwritten when pytest is found +# We also configure the same PYTHONPATH in the pytest settings in +# pyproject.toml, but pytest versions below 8.4 only actually use that +# value after plugin loading. On lower versions pytest will throw an error even +# when just running 'pytest --version'. So we need to configure it here too. +# This won't help people manually running pytest outside of meson/make, but we +# expect those to use a recent enough version of pytest anyway (and if not they +# can manually configure PYTHONPATH too). +pytest_env = {'PYTHONPATH': meson.project_source_root() / 'src' / 'test' / 'pytest'} + +pytestopt = get_option('pytest') +if not pytestopt.disabled() + pytest = find_program(get_option('PYTEST'), native: true, required: false) + + if pytest.found() + pytest_enabled = true + pytest_version = run_command(pytest, '--version', env: pytest_env, check: false).stdout().strip().split(' ')[-1] + pytest_cmd = [pytest.full_path()] + else + # Try python -m pytest as a fallback + pytest_check = run_command(python, '-m', 'pytest', '--version', env: pytest_env, check: false) + if pytest_check.returncode() == 0 + pytest_enabled = true + pytest_version = pytest_check.stdout().strip().split(' ')[-1] + pytest_cmd = [python.full_path(), '-m', 'pytest'] + endif + endif + + if not pytest_enabled and pytestopt.enabled() + error('pytest not found') + endif +endif + + + ############################################################### # Library: zstd ############################################################### @@ -4118,6 +4159,64 @@ foreach test_dir : tests ) endforeach install_suites += test_group + elif kind == 'pytest' + testwrap_pytest = testwrap_base + if not pytest_enabled + testwrap_pytest += ['--skip', 'pytest not enabled'] + endif + + test_command = pytest_cmd + + test_command += [ + '-c', meson.project_source_root() / 'pyproject.toml', + '--verbose', + '-p', 'pgtap', # enable our test reporter plugin + '-ra', # show skipped and xfailed tests too + ] + + # Add temporary install, the build directory for non-installed binaries and + # also test/ for non-installed test binaries built separately. + env = test_env + env.prepend('PATH', temp_install_bindir, test_dir['bd'], test_dir['bd'] / 'test') + temp_install_datadir = '@0@@1@'.format(test_install_destdir, dir_prefix / dir_data) + env.set('share_contrib_dir', temp_install_datadir / 'contrib') + env.prepend('PYTHONPATH', pytest_env['PYTHONPATH']) + + foreach name, value : t.get('env', {}) + env.set(name, value) + endforeach + + test_group = test_dir['name'] + test_kwargs = { + 'protocol': 'tap', + 'suite': test_group, + 'timeout': 1000, + 'depends': test_deps + t.get('deps', []), + 'env': env, + } + t.get('test_kwargs', {}) + + foreach onetest : t['tests'] + # Make test names prettier, remove pyt/ and .py + onetest_p = onetest + if onetest_p.startswith('pyt/') + onetest_p = onetest.split('pyt/')[1] + endif + if onetest_p.endswith('.py') + onetest_p = fs.stem(onetest_p) + endif + + test(test_dir['name'] / onetest_p, + python, + kwargs: test_kwargs, + args: testwrap_pytest + [ + '--testgroup', test_dir['name'], + '--testname', onetest_p, + '--', test_command, + test_dir['sd'] / onetest, + ], + ) + endforeach + install_suites += test_group else error('unknown kind @0@ of test in @1@'.format(kind, test_dir['sd'])) endif @@ -4310,6 +4409,7 @@ summary( 'bison': '@0@ @1@'.format(bison.full_path(), bison_version), 'dtrace': dtrace, 'flex': '@0@ @1@'.format(flex.full_path(), flex_version), + 'pytest': pytest_enabled ? ' '.join(pytest_cmd) + ' ' + pytest_version : not_found_dep, }, section: 'Programs', ) diff --git a/meson_options.txt b/meson_options.txt index 6a793f3e47943..cb4825c35756b 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -41,7 +41,10 @@ option('cassert', type: 'boolean', value: false, description: 'Enable assertion checks (for debugging)') option('tap_tests', type: 'feature', value: 'auto', - description: 'Enable TAP tests') + description: 'Enable (Perl-based) TAP tests') + +option('pytest', type: 'feature', value: 'auto', + description: 'Enable (Python-based) pytest suites') option('injection_points', type: 'boolean', value: false, description: 'Enable injection points') @@ -195,6 +198,9 @@ option('PERL', type: 'string', value: 'perl', option('PROVE', type: 'string', value: 'prove', description: 'Path to prove binary') +option('PYTEST', type: 'array', value: ['pytest', 'py.test'], + description: 'Path to pytest binary') + option('PYTHON', type: 'array', value: ['python3', 'python'], description: 'Path to python binary') diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000..60abb4d06557f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[project] +name = "postgresql-hackers-tooling" +version = "0.1.0" +description = "Pytest infrastructure for PostgreSQL" +requires-python = ">=3.6" +dependencies = [ + # pytest 7.0 was the last version which supported Python 3.6, but the BSDs + # have started putting 8.x into ports, so we support both. (pytest 8 can be + # used throughout once we drop support for Python 3.7.) + "pytest >= 7.0, < 10", + + # Any other dependencies are effectively optional (added below). We import + # these libraries using pytest.importorskip(). So tests will be skipped if + # they are not available. +] + +[tool.pytest.ini_options] +minversion = "7.0" + +# Common test code can be found here. +pythonpath = ["src/test/pytest"] diff --git a/src/Makefile.global.in b/src/Makefile.global.in index cef1ad7f87d98..53b37c82df55b 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -211,6 +211,7 @@ enable_dtrace = @enable_dtrace@ enable_coverage = @enable_coverage@ enable_injection_points = @enable_injection_points@ enable_tap_tests = @enable_tap_tests@ +enable_pytest = @enable_pytest@ python_includespec = @python_includespec@ python_libdir = @python_libdir@ @@ -356,6 +357,7 @@ MSGFMT = @MSGFMT@ MSGFMT_FLAGS = @MSGFMT_FLAGS@ MSGMERGE = @MSGMERGE@ OPENSSL = @OPENSSL@ +PYTEST = @PYTEST@ PYTHON = @PYTHON@ TAR = @TAR@ XGETTEXT = @XGETTEXT@ @@ -510,6 +512,33 @@ prove_installcheck = @echo "TAP tests not enabled. Try configuring with --enable prove_check = $(prove_installcheck) endif +ifeq ($(enable_pytest),yes) + +pytest_installcheck = @echo "Installcheck is not currently supported for pytest." + +# We also configure the same PYTHONPATH in the pytest settings in +# pyproject.toml, but pytest versions below 8.4 only actually use that value +# after plugin loading. So we need to configure it here too. This won't help +# people manually running pytest outside of meson/make, but we expect those to +# use a recent enough version of pytest anyway (and if not they can manually +# configure PYTHONPATH too). +define pytest_check +echo "# +++ pytest check in $(subdir) +++" && \ +rm -rf '$(CURDIR)'/tmp_check && \ +$(MKDIR_P) '$(CURDIR)'/tmp_check && \ +cd $(srcdir) && \ + TESTLOGDIR='$(CURDIR)/tmp_check/log' \ + TESTDATADIR='$(CURDIR)/tmp_check' \ + PYTHONPATH='$(abs_top_srcdir)/src/test/pytest:$$PYTHONPATH' \ + $(with_temp_install) \ + $(PYTEST) -c '$(abs_top_srcdir)/pyproject.toml' --verbose -ra ./pyt/ +endef + +else +pytest_installcheck = @echo "pytest is not enabled. Try configuring with --enable-pytest" +pytest_check = $(pytest_installcheck) +endif + # Installation. install_bin = @install_bin@ diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 2401025d1cd6f..663f49de381ac 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -56,6 +56,8 @@ pgxs_kv = { 'enable_nls': libintl.found() ? 'yes' : 'no', 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', 'enable_tap_tests': tap_tests_enabled ? 'yes' : 'no', + 'enable_pytest': pytest_enabled ? 'yes' : 'no', + 'PYTEST': pytest_enabled ? ' '.join(pytest_cmd) : '', 'enable_debug': get_option('debug') ? 'yes' : 'no', 'enable_coverage': 'no', 'enable_dtrace': dtrace.found() ? 'yes' : 'no', diff --git a/src/test/Makefile b/src/test/Makefile index 3eb0a06abb46e..0be9771d71f5f 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -18,6 +18,7 @@ SUBDIRS = \ modules \ perl \ postmaster \ + pytest \ recovery \ regress \ subscription diff --git a/src/test/meson.build b/src/test/meson.build index cd45cbf57fb0f..09175f0eaea4e 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -5,6 +5,7 @@ subdir('isolation') subdir('authentication') subdir('postmaster') +subdir('pytest') subdir('recovery') subdir('subscription') subdir('modules') diff --git a/src/test/pytest/Makefile b/src/test/pytest/Makefile new file mode 100644 index 0000000000000..2bdca96ccbee3 --- /dev/null +++ b/src/test/pytest/Makefile @@ -0,0 +1,20 @@ +#------------------------------------------------------------------------- +# +# Makefile for pytest +# +# Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/test/pytest/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/pytest +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(pytest_check) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/pytest/README b/src/test/pytest/README new file mode 100644 index 0000000000000..1333ed77b7e1e --- /dev/null +++ b/src/test/pytest/README @@ -0,0 +1 @@ +TODO diff --git a/src/test/pytest/meson.build b/src/test/pytest/meson.build new file mode 100644 index 0000000000000..b1f6061b3079f --- /dev/null +++ b/src/test/pytest/meson.build @@ -0,0 +1,15 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +if not pytest_enabled + subdir_done() +endif + +tests += { + 'name': 'pytest', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + ], + }, +} diff --git a/src/test/pytest/pgtap.py b/src/test/pytest/pgtap.py new file mode 100644 index 0000000000000..2ae16b624d571 --- /dev/null +++ b/src/test/pytest/pgtap.py @@ -0,0 +1,197 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import os +import sys + +import pytest + +# +# Helpers +# + + +class TAP: + """ + A basic API for reporting via the TAP 12 protocol. + + https://testanything.org/tap-specification.html + """ + + def __init__(self): + self.count = 0 + + def expect(self, num: int): + self.print(f"1..{num}") + + def print(self, *args): + print(*args, file=sys.__stdout__) + + def ok(self, name: str): + self.count += 1 + self.print("ok", self.count, "-", name) + + def skip(self, name: str, reason: str): + self.count += 1 + self.print("ok", self.count, "-", name, "# skip", reason) + + def fail(self, name: str, details: str): + self.count += 1 + self.print("not ok", self.count, "-", name) + + # mtest has some odd behavior around TAP tests where it won't print + # diagnostics on failure if they're part of the stdout stream, so we + # might as well just dump the details directly to stderr instead. + print(details, file=sys.__stderr__) + + +tap = TAP() + + +class TestNotes: + """ + Annotations for a single test. The existing pytest hooks keep interesting + information somewhat separated across the different stages + (setup/test/teardown), so this class is used to correlate them. + """ + + skipped = False + skip_reason = None + + failed = False + details = "" + + +# Register a custom key in the stash dictionary for keeping our TestNotes. +notes_key = pytest.StashKey[TestNotes]() + + +# +# Hook Implementations +# + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config): + """ + Hijacks the standard streams as soon as possible during pytest startup. The + pytest-formatted output gets logged to file instead, and we'll use the + original sys.__stdout__/__stderr__ streams for the TAP protocol. + """ + logdir = os.getenv("TESTLOGDIR") + if not logdir: + raise RuntimeError("pgtap requires the TESTLOGDIR envvar to be set") + + os.makedirs(logdir) + logpath = os.path.join(logdir, "pytest.log") + sys.stdout = sys.stderr = open(logpath, "a", buffering=1) + + +@pytest.hookimpl(trylast=True) +def pytest_sessionfinish(session, exitstatus): + """ + Suppresses nonzero exit codes due to failed tests. (In that case, we want + Meson to report a failure count, not a generic ERROR.) + """ + if exitstatus == pytest.ExitCode.TESTS_FAILED: + session.exitstatus = pytest.ExitCode.OK + + +@pytest.hookimpl +def pytest_collectreport(report): + # Include collection failures directly in Meson error output. + if report.failed: + print(report.longreprtext, file=sys.__stderr__) + + +@pytest.hookimpl +def pytest_internalerror(excrepr, excinfo): + # Include internal errors directly in Meson error output. + print(excrepr, file=sys.__stderr__) + + +# +# Hook Wrappers +# +# In pytest parlance, a "wrapper" for a hook can inspect and optionally modify +# existing hooks' behavior, but it does not replace the hook chain. This is done +# through a generator-style API which chains the hooks together (see the use of +# `yield`). +# + + +@pytest.hookimpl(hookwrapper=True) +def pytest_collection(session): + """Reports the number of gathered tests after collection is finished.""" + res = yield + tap.expect(session.testscollected) + return res + + +@pytest.hookimpl(hookwrapper=True) +def pytest_runtest_makereport(item, call): + """ + Annotates a test item with our TestNotes and grabs relevant information for + reporting. + + This is called multiple times per test, so it's not correct to print the TAP + result here. (A test and its teardown stage can both fail, and we want to + see the details for both.) We instead combine all the information for use by + our pytest_runtest_protocol wrapper later on. + """ + res = yield + + if notes_key not in item.stash: + item.stash[notes_key] = TestNotes() + notes = item.stash[notes_key] + + report = res.get_result() + if report.passed: + pass # no annotation needed + + elif report.skipped: + notes.skipped = True + _, _, notes.skip_reason = report.longrepr + + elif report.failed: + notes.failed = True + + if not notes.details: + notes.details += "{:_^72}\n\n".format(f" {report.head_line} ") + + if report.when in ("setup", "teardown"): + notes.details += "\n{:_^72}\n\n".format( + f" Error during {report.when} of {report.head_line} " + ) + + notes.details += report.longreprtext + "\n" + + # Include captured stdout/stderr/log in failure output + for section_name, section_content in report.sections: + if section_content.strip(): + notes.details += "\n{:-^72}\n".format(f" {section_name} ") + notes.details += section_content + "\n" + + else: + raise RuntimeError("pytest_runtest_makereport received unknown test status") + + return res + + +@pytest.hookimpl(hookwrapper=True) +def pytest_runtest_protocol(item, nextitem): + """ + Reports the TAP result for this test item using our gathered TestNotes. + """ + res = yield + + assert notes_key in item.stash, "pgtap didn't annotate a test item?" + notes = item.stash[notes_key] + + if notes.failed: + tap.fail(item.nodeid, notes.details) + elif notes.skipped: + tap.skip(item.nodeid, notes.skip_reason) + else: + tap.ok(item.nodeid) + + return res diff --git a/src/tools/testwrap b/src/tools/testwrap index e91296ecd1531..346f86b8ea361 100755 --- a/src/tools/testwrap +++ b/src/tools/testwrap @@ -42,7 +42,11 @@ open(os.path.join(testdir, 'test.start'), 'x') env_dict = {**os.environ, 'TESTDATADIR': os.path.join(testdir, 'data'), - 'TESTLOGDIR': os.path.join(testdir, 'log')} + 'TESTLOGDIR': os.path.join(testdir, 'log'), + # Prevent emitting terminal capability sequences that pollute the + # TAP output stream (i.e.\033[?1034h). This happens on OpenBSD with + # pytest for unknown reasons. + 'TERM': ''} # The configuration time value of PG_TEST_EXTRA is supplied via argument From 1317672c648ec86421082429ad0031150e1e2e68 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 16 Dec 2025 09:25:48 +0100 Subject: [PATCH 04/36] Add pytest infrastructure to interact with PostgreSQL servers This adds functionality to the pytest infrastructure that allows tests to do common things with PostgreSQL servers like: - creating - starting - stopping - connecting - running queries - handling errors The goal of this infrastructure is to be so easy to use that the actual tests really only contain the logic to test the behaviour that the tests are testing, as opposed to a bunch of boilerplate. Examples of this are: Types get converted to their Python counter parts automatically. Errors become actual Python exceptions. Results of queries that only return a single row or cell are unpacked automatically, so you don't have to do rows[0][0] if the query only returns a single cell. The only new tests that are part of this commit are tests that cover this testing infrastructure itself. It's debatable whether such tests are useful long term, because any infrastructure that's unused by actual tests should probably not exist. For now it seems good to test this basic functionality though, both to make sure we don't break it before committing actual tests that use it, and also as an example for people writing new tests. --- doc/src/sgml/regress.sgml | 66 ++- pyproject.toml | 3 + src/test/pytest/README | 154 ++++++- src/test/pytest/libpq/__init__.py | 35 ++ src/test/pytest/libpq/_core.py | 488 ++++++++++++++++++++++ src/test/pytest/libpq/errors.py | 62 +++ src/test/pytest/meson.build | 4 + src/test/pytest/pypg/__init__.py | 10 + src/test/pytest/pypg/_env.py | 72 ++++ src/test/pytest/pypg/fixtures.py | 356 ++++++++++++++++ src/test/pytest/pypg/server.py | 482 +++++++++++++++++++++ src/test/pytest/pypg/util.py | 55 +++ src/test/pytest/pyt/conftest.py | 1 + src/test/pytest/pyt/test_errors.py | 34 ++ src/test/pytest/pyt/test_libpq.py | 35 ++ src/test/pytest/pyt/test_multi_server.py | 46 ++ src/test/pytest/pyt/test_query_helpers.py | 347 +++++++++++++++ 17 files changed, 2248 insertions(+), 2 deletions(-) create mode 100644 src/test/pytest/libpq/__init__.py create mode 100644 src/test/pytest/libpq/_core.py create mode 100644 src/test/pytest/libpq/errors.py create mode 100644 src/test/pytest/pypg/__init__.py create mode 100644 src/test/pytest/pypg/_env.py create mode 100644 src/test/pytest/pypg/fixtures.py create mode 100644 src/test/pytest/pypg/server.py create mode 100644 src/test/pytest/pypg/util.py create mode 100644 src/test/pytest/pyt/conftest.py create mode 100644 src/test/pytest/pyt/test_errors.py create mode 100644 src/test/pytest/pyt/test_libpq.py create mode 100644 src/test/pytest/pyt/test_multi_server.py create mode 100644 src/test/pytest/pyt/test_query_helpers.py diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index c74941bfbf20a..a3a9d55e4f33a 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -928,7 +928,7 @@ float4:out:.*-.*-cygwin.*=float4-misrounded-input.out - TAP Tests + Perl TAP Tests Various tests, particularly the client program tests @@ -1017,6 +1017,70 @@ PG_TEST_NOCLEAN=1 make -C src/bin/pg_dump check + + Pytest Tests + + + Tests in pyt directories use the Python + pytest framework. These tests provide a + convenient way to test libpq client functionality and scenarios requiring + multiple PostgreSQL server instances. + + + + The pytest tests require PostgreSQL to be + configured with the option (or + for Meson builds). You also need + pytest installed. You can either install it + system-wide, or create a virtual environment in the source directory: + +python -m venv .venv +source .venv/bin/activate +pip install . + + Alternatively, if you have uv installed: + +uv sync +source .venv/bin/activate + + Remember to activate the virtual environment before running + configure or meson setup. + + + + With Meson builds, you can run the pytest tests using: + +meson test --suite pytest + + With autoconf-based builds, you can run them from the + src/test/pytest directory using: + +make check + + + + + You can also run specific test files directly using pytest: + +pytest src/test/pytest/pyt/test_libpq.py +pytest -k "test_connstr" + + + + + Many operations in the test suites use a 180-second timeout, which on slow + hosts may lead to load-induced timeouts. Setting the environment variable + PG_TEST_TIMEOUT_DEFAULT to a higher number will change + the default to avoid this. + + + + For more information on writing pytest tests, see the + src/test/pytest/README file. + + + + Test Coverage Examination diff --git a/pyproject.toml b/pyproject.toml index 60abb4d06557f..4628d2274e010 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,3 +19,6 @@ minversion = "7.0" # Common test code can be found here. pythonpath = ["src/test/pytest"] + +# Load the shared fixtures plugin +addopts = ["-p", "pypg.fixtures"] diff --git a/src/test/pytest/README b/src/test/pytest/README index 1333ed77b7e1e..bb75e56a25dad 100644 --- a/src/test/pytest/README +++ b/src/test/pytest/README @@ -1 +1,153 @@ -TODO +src/test/pytest/README + +Pytest-based tests +================== + +This directory contains infrastructure for Python-based tests using pytest, +along with some core tests for the pytest infrastructure itself. The framework +provides fixtures for managing PostgreSQL server instances and connecting to +them via libpq. + + +Running the tests +================= + +NOTE: You must have given the --enable-pytest argument to configure (or +-Dpytest=enabled for Meson builds). You also need to have pytest installed. + +If you don't have pytest installed system-wide, you can create a virtual +environment: + + python3 -m venv .venv + source .venv/bin/activate # On Windows: .venv\Scripts\activate + pip install . # Installs pytest and other dependencies + +Or using uv (https://docs.astral.sh/uv/): + + uv sync + source .venv/bin/activate # On Windows: .venv\Scripts\activate + +Remember to activate the virtual environment before running configure/meson +setup. + +With Meson builds, you can run: + meson test --suite pytest + +With autoconf based builds, you can run: + make check +or + make installcheck + +You can run specific test files and/or use pytest's -k option to select tests: + pytest src/test/pytest/pyt/test_libpq.py + pytest -k "test_connstr" + + +Directory structure +=================== + +pypg/ + Python library providing common functions and pytest fixtures that can be + used in tests. + +libpq/ + A simple but user-friendly python wrapper around libpq + +pyt/ + Tests for the pytest infrastructure itself + +pgtap.py + A pytest plugin to output results in TAP format + + +Writing tests +============= + +Tests use pytest fixtures to manage server instances and connections. The +most commonly used fixtures are: + +pg + A PostgresServer instance configured for the current test. Use this for + creating test users/databases or modifying server configuration. Changes + are automatically rolled back after the test. + +conn + A connected PGconn instance to the test server. Automatically cleaned up + after the test. + +connect + A function to create additional connections with custom options. + +create_pg + A factory function to create additional PostgreSQL servers within a test. + Servers are automatically cleaned up at the end of the test. Useful for + testing scenarios that require multiple independent servers. + +create_pg_module + Like create_pg, but servers persist for the entire test module. Use this + when multiple tests in a module can share the same servers, which is + faster than creating new servers for each test. + + +Example test: + + def test_simple_query(conn): + result = conn.sql("SELECT 1 + 1") + assert result == 2 + + def test_with_user(pg): + users = pg.create_users("test") + with pg.reloading() as s: + s.hba.prepend(["local", "all", users["test"], "trust"]) + + conn = pg.connect(user=users["test"]) + assert conn.sql("SELECT current_user") == users["test"] + + def test_multiple_servers(create_pg): + node1 = create_pg("primary") + node2 = create_pg("secondary") + + conn1 = node1.connect() + conn2 = node2.connect() + + # Each server is independent + assert node1.port != node2.port + + +Server configuration +==================== + +Tests can temporarily modify server configuration using context managers: + + with pg.reloading() as s: + s.conf.set(log_connections="on") + s.hba.prepend("local all all trust") + # Server is reloaded here + # After the test finished the original configuration is restored and + # the server is reloaded again + +Use pg.restarting() instead if the configuration change requires a restart. + + +Timeouts +======== + +Tests inherit the PG_TEST_TIMEOUT_DEFAULT environment variable (defaulting +to 180 seconds). The remaining_timeout fixture provides a function that +returns how much time remains for the current test. + + +Environment variables +===================== + +PG_TEST_TIMEOUT_DEFAULT + Per-test timeout in seconds (default: 180) + +PG_CONFIG + Path to pg_config (default: uses PATH) + +TESTDATADIR + Directory for test data (default: pytest temp directory) + +PG_TEST_EXTRA + Space-separated list of optional test categories to run (e.g., "ssl") diff --git a/src/test/pytest/libpq/__init__.py b/src/test/pytest/libpq/__init__.py new file mode 100644 index 0000000000000..6a71ebbe43f03 --- /dev/null +++ b/src/test/pytest/libpq/__init__.py @@ -0,0 +1,35 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +libpq testing utilities - ctypes bindings and helpers for PostgreSQL's libpq library. + +This module provides Python wrappers around libpq for use in pytest tests. +""" + +from . import errors +from .errors import LibpqError +from ._core import ( + ConnectionStatus, + DiagField, + ExecStatus, + PGconn, + PGresult, + connect, + connstr, + load_libpq_handle, + register_type_info, +) + +__all__ = [ + "errors", + "LibpqError", + "ConnectionStatus", + "DiagField", + "ExecStatus", + "PGconn", + "PGresult", + "connect", + "connstr", + "load_libpq_handle", + "register_type_info", +] diff --git a/src/test/pytest/libpq/_core.py b/src/test/pytest/libpq/_core.py new file mode 100644 index 0000000000000..1c059b9b44657 --- /dev/null +++ b/src/test/pytest/libpq/_core.py @@ -0,0 +1,488 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +Core libpq functionality - ctypes bindings and connection handling. +""" + +import contextlib +import ctypes +import datetime +import decimal +import enum +import json +import platform +import os +import uuid +from typing import Any, Callable, Dict, Optional + +from .errors import LibpqError + + +# PG_DIAG field identifiers from postgres_ext.h +class DiagField(enum.IntEnum): + SEVERITY = ord("S") + SEVERITY_NONLOCALIZED = ord("V") + SQLSTATE = ord("C") + MESSAGE_PRIMARY = ord("M") + MESSAGE_DETAIL = ord("D") + MESSAGE_HINT = ord("H") + STATEMENT_POSITION = ord("P") + INTERNAL_POSITION = ord("p") + INTERNAL_QUERY = ord("q") + CONTEXT = ord("W") + SCHEMA_NAME = ord("s") + TABLE_NAME = ord("t") + COLUMN_NAME = ord("c") + DATATYPE_NAME = ord("d") + CONSTRAINT_NAME = ord("n") + SOURCE_FILE = ord("F") + SOURCE_LINE = ord("L") + SOURCE_FUNCTION = ord("R") + + +class ConnectionStatus(enum.IntEnum): + """PostgreSQL connection status codes from libpq.""" + + CONNECTION_OK = 0 + CONNECTION_BAD = 1 + + +class ExecStatus(enum.IntEnum): + """PostgreSQL result status codes from PQresultStatus.""" + + PGRES_EMPTY_QUERY = 0 + PGRES_COMMAND_OK = 1 + PGRES_TUPLES_OK = 2 + PGRES_COPY_OUT = 3 + PGRES_COPY_IN = 4 + PGRES_BAD_RESPONSE = 5 + PGRES_NONFATAL_ERROR = 6 + PGRES_FATAL_ERROR = 7 + PGRES_COPY_BOTH = 8 + PGRES_SINGLE_TUPLE = 9 + PGRES_PIPELINE_SYNC = 10 + PGRES_PIPELINE_ABORTED = 11 + + +class _PGconn(ctypes.Structure): + pass + + +class _PGresult(ctypes.Structure): + pass + + +_PGconn_p = ctypes.POINTER(_PGconn) +_PGresult_p = ctypes.POINTER(_PGresult) + + +def load_libpq_handle(libdir, bindir): + """ + Loads a ctypes handle for libpq. Some common function prototypes are + initialized for general use. + """ + system = platform.system() + + if system in ("Linux", "FreeBSD", "NetBSD", "OpenBSD"): + name = "libpq.so.5" + elif system == "Darwin": + name = "libpq.5.dylib" + elif system == "Windows": + name = "libpq.dll" + else: + assert False, f"the libpq fixture must be updated for {system}" + + if system == "Windows": + # On Windows, libpq.dll is confusingly in bindir, not libdir. And we + # need to add this directory the the search path. + libpq_path = os.path.join(bindir, name) + lib = ctypes.CDLL(libpq_path) + else: + libpq_path = os.path.join(libdir, name) + lib = ctypes.CDLL(libpq_path) + + # + # Function Prototypes + # + + lib.PQconnectdb.restype = _PGconn_p + lib.PQconnectdb.argtypes = [ctypes.c_char_p] + + lib.PQstatus.restype = ctypes.c_int + lib.PQstatus.argtypes = [_PGconn_p] + + lib.PQexec.restype = _PGresult_p + lib.PQexec.argtypes = [_PGconn_p, ctypes.c_char_p] + + lib.PQresultStatus.restype = ctypes.c_int + lib.PQresultStatus.argtypes = [_PGresult_p] + + lib.PQclear.restype = None + lib.PQclear.argtypes = [_PGresult_p] + + lib.PQerrorMessage.restype = ctypes.c_char_p + lib.PQerrorMessage.argtypes = [_PGconn_p] + + lib.PQfinish.restype = None + lib.PQfinish.argtypes = [_PGconn_p] + + lib.PQresultErrorMessage.restype = ctypes.c_char_p + lib.PQresultErrorMessage.argtypes = [_PGresult_p] + + lib.PQntuples.restype = ctypes.c_int + lib.PQntuples.argtypes = [_PGresult_p] + + lib.PQnfields.restype = ctypes.c_int + lib.PQnfields.argtypes = [_PGresult_p] + + lib.PQgetvalue.restype = ctypes.c_char_p + lib.PQgetvalue.argtypes = [_PGresult_p, ctypes.c_int, ctypes.c_int] + + lib.PQgetisnull.restype = ctypes.c_int + lib.PQgetisnull.argtypes = [_PGresult_p, ctypes.c_int, ctypes.c_int] + + lib.PQftype.restype = ctypes.c_uint + lib.PQftype.argtypes = [_PGresult_p, ctypes.c_int] + + lib.PQresultErrorField.restype = ctypes.c_char_p + lib.PQresultErrorField.argtypes = [_PGresult_p, ctypes.c_int] + + return lib + + +# PostgreSQL type OIDs and conversion system +# Type registry - maps OID to converter function +_type_converters: Dict[int, Callable[[str], Any]] = {} +_array_to_elem_map: Dict[int, int] = {} + + +def register_type_info( + name: str, oid: int, array_oid: int, converter: Callable[[str], Any] +): + """ + Register a PostgreSQL type with its OID, array OID, and conversion function. + + Usage: + register_type_info("bool", 16, 1000, lambda v: v == "t") + """ + _type_converters[oid] = converter + if array_oid is not None: + _array_to_elem_map[array_oid] = oid + + +def _parse_array(value: str, elem_oid: int): + """Parse PostgreSQL array syntax into nested Python lists.""" + stack: list[list] = [] + current_element: list[str] = [] + in_quotes = False + was_quoted = False + pos = 0 + + while pos < len(value): + char = value[pos] + + if in_quotes: + if char == "\\": + next_char = value[pos + 1] + if next_char not in '"\\': + raise NotImplementedError('Only \\" and \\\\ escapes are supported') + current_element.append(next_char) + pos += 2 + continue + elif char == '"': + in_quotes = False + else: + current_element.append(char) + elif char == '"': + in_quotes = True + was_quoted = True + elif char == "{": + stack.append([]) + elif char in ",}": + if current_element or was_quoted: + elem = "".join(current_element) + if not was_quoted and elem == "NULL": + stack[-1].append(None) + else: + stack[-1].append(_convert_pg_value(elem, elem_oid)) + current_element = [] + was_quoted = False + if char == "}": + completed = stack.pop() + if not stack: + return completed + stack[-1].append(completed) + elif char != " ": + current_element.append(char) + pos += 1 + + raise ValueError(f"Malformed array literal: {value}") + + +# Register standard PostgreSQL types that we'll likely encounter in tests +register_type_info("bool", 16, 1000, lambda v: v == "t") +register_type_info("int2", 21, 1005, int) +register_type_info("int4", 23, 1007, int) +register_type_info("int8", 20, 1016, int) +register_type_info("float4", 700, 1021, float) +register_type_info("float8", 701, 1022, float) +register_type_info("numeric", 1700, 1231, decimal.Decimal) +register_type_info("text", 25, 1009, str) +register_type_info("varchar", 1043, 1015, str) +register_type_info("date", 1082, 1182, datetime.date.fromisoformat) +register_type_info("time", 1083, 1183, datetime.time.fromisoformat) +register_type_info("timestamp", 1114, 1115, datetime.datetime.fromisoformat) +register_type_info("timestamptz", 1184, 1185, datetime.datetime.fromisoformat) +register_type_info("uuid", 2950, 2951, uuid.UUID) +register_type_info("json", 114, 199, json.loads) +register_type_info("jsonb", 3802, 3807, json.loads) + + +def _convert_pg_value(value: str, type_oid: int) -> Any: + """ + Convert PostgreSQL string value to appropriate Python type based on OID. + Uses the registered type converters from register_type_info(). + """ + # Check if it's an array type + if type_oid in _array_to_elem_map: + elem_oid = _array_to_elem_map[type_oid] + return _parse_array(value, elem_oid) + + # Use registered converter if available + converter = _type_converters.get(type_oid) + if converter: + return converter(value) + + # Unknown types - return as string + return value + + +def simplify_query_results(results) -> Any: + """ + Simplify the results of a query so that the caller doesn't have to unpack + lists and tuples of length 1. + """ + if len(results) == 1: + row = results[0] + if len(row) == 1: + # If there's only a single cell, just return the value + return row[0] + # If there's only a single row, just return that row + return row + + if len(results) != 0 and len(results[0]) == 1: + # If there's only a single column, return an array of values + return [row[0] for row in results] + + # if there are multiple rows and columns, return the results as is + return results + + +class PGresult(contextlib.AbstractContextManager): + """Wraps a raw _PGresult_p with a more friendly interface.""" + + def __init__(self, lib: ctypes.CDLL, res: _PGresult_p): + self._lib = lib + self._res = res + + def __exit__(self, *exc): + self._lib.PQclear(self._res) + self._res = None + + def status(self) -> ExecStatus: + return ExecStatus(self._lib.PQresultStatus(self._res)) + + def error_message(self): + """Returns the error message associated with this result.""" + msg = self._lib.PQresultErrorMessage(self._res) + return msg.decode() if msg else "" + + def _get_error_field(self, field: DiagField) -> Optional[str]: + """Get an error field from the result using PQresultErrorField.""" + val = self._lib.PQresultErrorField(self._res, int(field)) + return val.decode() if val else None + + def raise_error(self) -> None: + """ + Raises LibpqError with diagnostic information from the result. + """ + if not self._res: + raise LibpqError("query failed: out of memory or connection lost") + + sqlstate = self._get_error_field(DiagField.SQLSTATE) + primary = self._get_error_field(DiagField.MESSAGE_PRIMARY) + detail = self._get_error_field(DiagField.MESSAGE_DETAIL) + hint = self._get_error_field(DiagField.MESSAGE_HINT) + severity = self._get_error_field(DiagField.SEVERITY) + schema_name = self._get_error_field(DiagField.SCHEMA_NAME) + table_name = self._get_error_field(DiagField.TABLE_NAME) + column_name = self._get_error_field(DiagField.COLUMN_NAME) + datatype_name = self._get_error_field(DiagField.DATATYPE_NAME) + constraint_name = self._get_error_field(DiagField.CONSTRAINT_NAME) + context = self._get_error_field(DiagField.CONTEXT) + + position_str = self._get_error_field(DiagField.STATEMENT_POSITION) + position = int(position_str) if position_str else None + + raise LibpqError( + primary or self.error_message(), + sqlstate=sqlstate, + severity=severity, + primary=primary, + detail=detail, + hint=hint, + schema_name=schema_name, + table_name=table_name, + column_name=column_name, + datatype_name=datatype_name, + constraint_name=constraint_name, + position=position, + context=context, + ) + + def fetch_all(self): + """ + Fetch all rows and convert to Python types. + Returns a list of tuples, with values converted based on their PostgreSQL type. + """ + nrows = self._lib.PQntuples(self._res) + ncols = self._lib.PQnfields(self._res) + + # Get type OIDs for each column + type_oids = [self._lib.PQftype(self._res, col) for col in range(ncols)] + + results = [] + for row in range(nrows): + row_data = [] + for col in range(ncols): + if self._lib.PQgetisnull(self._res, row, col): + row_data.append(None) + else: + value = self._lib.PQgetvalue(self._res, row, col).decode() + row_data.append(_convert_pg_value(value, type_oids[col])) + results.append(tuple(row_data)) + + return results + + +class PGconn(contextlib.AbstractContextManager): + """ + Wraps a raw _PGconn_p with a more friendly interface. This is just a + stub; it's expected to grow. + """ + + def __init__( + self, + lib: ctypes.CDLL, + handle: _PGconn_p, + stack: contextlib.ExitStack, + ): + self._lib = lib + self._handle = handle + self._stack = stack + + def __exit__(self, *exc): + self._lib.PQfinish(self._handle) + self._handle = None + + def exec(self, query: str): + """ + Executes a query via PQexec() and returns a PGresult. + """ + res = self._lib.PQexec(self._handle, query.encode()) + return self._stack.enter_context(PGresult(self._lib, res)) + + def sql(self, query: str): + """ + Executes a query and raises an exception if it fails. + Returns the query results with automatic type conversion and simplification. + For commands that don't return data (INSERT, UPDATE, etc.), returns None. + + Examples: + - SELECT 1 -> 1 + - SELECT 1, 2 -> (1, 2) + - SELECT * FROM generate_series(1, 3) -> [1, 2, 3] + - SELECT * FROM (VALUES (1, 'a'), (2, 'b')) t -> [(1, 'a'), (2, 'b')] + - CREATE TABLE ... -> None + - INSERT INTO ... -> None + """ + res = self.exec(query) + status = res.status() + + if status == ExecStatus.PGRES_FATAL_ERROR: + res.raise_error() + elif status == ExecStatus.PGRES_COMMAND_OK: + return None + elif status == ExecStatus.PGRES_TUPLES_OK: + results = res.fetch_all() + return simplify_query_results(results) + else: + res.raise_error() + + +def connstr(opts: Dict[str, Any]) -> str: + """ + Flattens the provided options into a libpq connection string. Values + are converted to str and quoted/escaped as necessary. + """ + settings = [] + + for k, v in opts.items(): + v = str(v) + if not v: + v = "''" + else: + v = v.replace("\\", "\\\\") + v = v.replace("'", "\\'") + + if " " in v: + v = f"'{v}'" + + settings.append(f"{k}={v}") + + return " ".join(settings) + + +def connect( + libpq_handle: ctypes.CDLL, + stack: contextlib.ExitStack, + remaining_timeout_fn: Callable[[], float], + **opts, +) -> PGconn: + """ + Connects to a server, using the given connection options, and + returns a PGconn object wrapping the connection handle. A + failure will raise LibpqError. + + Connections honor PG_TEST_TIMEOUT_DEFAULT unless connect_timeout is + explicitly overridden in opts. + + Args: + libpq_handle: ctypes.CDLL handle to libpq library + stack: ExitStack for managing connection cleanup + remaining_timeout_fn: Function that returns remaining timeout in seconds + **opts: Connection options (host, port, dbname, etc.) + + Returns: + PGconn: Connected database connection + + Raises: + LibpqError: If connection fails + """ + + if "connect_timeout" not in opts: + t = int(remaining_timeout_fn()) + opts["connect_timeout"] = max(t, 1) + + conn_p = libpq_handle.PQconnectdb(connstr(opts).encode()) + + # Check connection status before adding to stack + if libpq_handle.PQstatus(conn_p) != ConnectionStatus.CONNECTION_OK: + error_msg = libpq_handle.PQerrorMessage(conn_p).decode() + # Manually close the failed connection + libpq_handle.PQfinish(conn_p) + raise LibpqError(error_msg) + + # Connection succeeded - add to stack for cleanup + conn = stack.enter_context(PGconn(libpq_handle, conn_p, stack=stack)) + return conn diff --git a/src/test/pytest/libpq/errors.py b/src/test/pytest/libpq/errors.py new file mode 100644 index 0000000000000..c665b663e221c --- /dev/null +++ b/src/test/pytest/libpq/errors.py @@ -0,0 +1,62 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +Exception classes for libpq errors. +""" + +from typing import Optional + + +class LibpqError(RuntimeError): + """Exception for libpq errors with PostgreSQL diagnostic fields.""" + + sqlstate: Optional[str] + severity: Optional[str] + primary: Optional[str] + detail: Optional[str] + hint: Optional[str] + schema_name: Optional[str] + table_name: Optional[str] + column_name: Optional[str] + datatype_name: Optional[str] + constraint_name: Optional[str] + position: Optional[int] + context: Optional[str] + + def __init__( + self, + message: str, + *, + sqlstate: Optional[str] = None, + severity: Optional[str] = None, + primary: Optional[str] = None, + detail: Optional[str] = None, + hint: Optional[str] = None, + schema_name: Optional[str] = None, + table_name: Optional[str] = None, + column_name: Optional[str] = None, + datatype_name: Optional[str] = None, + constraint_name: Optional[str] = None, + position: Optional[int] = None, + context: Optional[str] = None, + ): + super().__init__(message) + self.sqlstate = sqlstate + self.severity = severity + self.primary = primary + self.detail = detail + self.hint = hint + self.schema_name = schema_name + self.table_name = table_name + self.column_name = column_name + self.datatype_name = datatype_name + self.constraint_name = constraint_name + self.position = position + self.context = context + + @property + def sqlstate_class(self) -> Optional[str]: + """Returns the 2-character SQLSTATE class.""" + if self.sqlstate and len(self.sqlstate) >= 2: + return self.sqlstate[:2] + return None diff --git a/src/test/pytest/meson.build b/src/test/pytest/meson.build index b1f6061b3079f..b86be901e7c2e 100644 --- a/src/test/pytest/meson.build +++ b/src/test/pytest/meson.build @@ -10,6 +10,10 @@ tests += { 'bd': meson.current_build_dir(), 'pytest': { 'tests': [ + 'pyt/test_errors.py', + 'pyt/test_libpq.py', + 'pyt/test_multi_server.py', + 'pyt/test_query_helpers.py', ], }, } diff --git a/src/test/pytest/pypg/__init__.py b/src/test/pytest/pypg/__init__.py new file mode 100644 index 0000000000000..4ee91289f7003 --- /dev/null +++ b/src/test/pytest/pypg/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +from ._env import require_test_extras, skip_unless_test_extras +from .server import PostgresServer + +__all__ = [ + "require_test_extras", + "skip_unless_test_extras", + "PostgresServer", +] diff --git a/src/test/pytest/pypg/_env.py b/src/test/pytest/pypg/_env.py new file mode 100644 index 0000000000000..c4087be32125f --- /dev/null +++ b/src/test/pytest/pypg/_env.py @@ -0,0 +1,72 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import logging +import os + +import pytest + +logger = logging.getLogger(__name__) + + +def _test_extra_skip_reason(*keys: str) -> str: + return "requires {} to be set in PG_TEST_EXTRA".format(", ".join(keys)) + + +def _has_test_extra(key: str) -> bool: + """ + Returns True if the PG_TEST_EXTRA environment variable contains the given + key. + """ + extra = os.getenv("PG_TEST_EXTRA", "") + return key in extra.split() + + +def require_test_extras(*keys: str): + """ + A convenience annotation which will skip tests if all of the required keys + are not present in PG_TEST_EXTRA. + + To skip a particular test function or class: + + @pypg.require_test_extras("ldap") + def test_some_ldap_feature(): + ... + + To skip an entire module: + + pytestmark = pypg.require_test_extra("ssl", "kerberos") + """ + return pytest.mark.skipif( + not all([_has_test_extra(k) for k in keys]), + reason=_test_extra_skip_reason(*keys), + ) + + +def skip_unless_test_extras(*keys: str): + """ + Skip the current test/fixture if any of the required keys are not present + in PG_TEST_EXTRA. Use this inside fixtures where decorators can't be used. + + @pytest.fixture + def my_fixture(): + skip_unless_test_extras("ldap") + ... + """ + if not all([_has_test_extra(k) for k in keys]): + pytest.skip(_test_extra_skip_reason(*keys)) + + +def test_timeout_default() -> int: + """ + Returns the value of the PG_TEST_TIMEOUT_DEFAULT environment variable, in + seconds, or 180 if one was not provided. + """ + default = os.getenv("PG_TEST_TIMEOUT_DEFAULT", "") + if not default: + return 180 + + try: + return int(default) + except ValueError as v: + logger.warning("PG_TEST_TIMEOUT_DEFAULT could not be parsed: " + str(v)) + return 180 diff --git a/src/test/pytest/pypg/fixtures.py b/src/test/pytest/pypg/fixtures.py new file mode 100644 index 0000000000000..39d72a3cb3866 --- /dev/null +++ b/src/test/pytest/pypg/fixtures.py @@ -0,0 +1,356 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import os +import contextlib +import pathlib +import tempfile +import time +from typing import List + +import pytest + +from ._env import test_timeout_default +from .util import capture +from .server import PostgresServer + +from libpq import load_libpq_handle, connect as libpq_connect + + +# Stash key for tracking servers for log reporting. +_servers_key = pytest.StashKey[List[PostgresServer]]() + + +def _record_server_for_log_reporting(request, server): + """Record a server for log reporting on test failure.""" + if _servers_key not in request.node.stash: + request.node.stash[_servers_key] = [] + request.node.stash[_servers_key].append(server) + + +@pytest.fixture +def remaining_timeout(): + """ + This fixture provides a function that returns how much of the + PG_TEST_TIMEOUT_DEFAULT remains for the current test, in fractional seconds. + This value is never less than zero. + + This fixture is per-test, so the deadline is also reset on a per-test basis. + """ + now = time.monotonic() + deadline = now + test_timeout_default() + + return lambda: max(deadline - time.monotonic(), 0) + + +@pytest.fixture(scope="module") +def remaining_timeout_module(): + """ + Same as remaining_timeout, but the deadline is set once per module. + + This fixture is per-module, which means it's generally only really useful + for configuring timeouts of operations that happen in the setup phase of + another module fixtures. If you use it in a test it would mean that each + subsequent test in the module gets a reduced timeout. + """ + now = time.monotonic() + deadline = now + test_timeout_default() + + return lambda: max(deadline - time.monotonic(), 0) + + +@pytest.fixture(scope="session") +def libpq_handle(libdir, bindir): + """ + Loads a ctypes handle for libpq. Some common function prototypes are + initialized for general use. + """ + try: + return load_libpq_handle(libdir, bindir) + except OSError as e: + if "wrong ELF class" in str(e): + # This happens in CI when trying to lead a 32-bit libpq library + # with a 64-bit Python + pytest.skip("libpq architecture does not match Python interpreter") + raise + + +@pytest.fixture +def connect(libpq_handle, remaining_timeout): + """ + Returns a function to connect to PostgreSQL via libpq. + + The returned function accepts connection options as keyword arguments + (host, port, dbname, etc.) and returns a PGconn object. Connections + are automatically cleaned up at the end of the test. + + Example: + conn = connect(host='localhost', port=5432, dbname='postgres') + result = conn.sql("SELECT 1") + """ + with contextlib.ExitStack() as stack: + + def _connect(**opts): + return libpq_connect(libpq_handle, stack, remaining_timeout, **opts) + + yield _connect + + +@pytest.fixture(scope="session") +def pg_config(): + """ + Returns the path to pg_config. Uses PG_CONFIG environment variable if set, + otherwise uses 'pg_config' from PATH. + """ + return os.environ.get("PG_CONFIG", "pg_config") + + +@pytest.fixture(scope="session") +def bindir(pg_config): + """ + Returns the PostgreSQL bin directory using pg_config --bindir. + """ + return pathlib.Path(capture(pg_config, "--bindir", silent=True)) + + +@pytest.fixture(scope="session") +def libdir(pg_config): + """ + Returns the PostgreSQL lib directory using pg_config --libdir. + """ + return pathlib.Path(capture(pg_config, "--libdir", silent=True)) + + +@pytest.fixture(scope="session") +def tmp_check(tmp_path_factory) -> pathlib.Path: + """ + Returns the tmp_check directory that should be used for the tests. If + TESTDATADIR is provided, that will be used; otherwise a new temporary + directory is created in the pytest temp root. + """ + d = os.getenv("TESTDATADIR") + if d: + d = pathlib.Path(d) + else: + d = tmp_path_factory.mktemp("tmp_check") + + return d + + +@pytest.fixture(scope="session") +def datadir(tmp_check): + """ + Returns the data directory to use for the pg fixture. + """ + + return tmp_check / "pgdata" + + +@pytest.fixture(scope="session") +def sockdir(): + """ + Returns the directory name to use as the server's unix_socket_directories + setting. Local client connections use this as the PGHOST. + + Uses tempfile.TemporaryDirectory directly instead of pytest's + tmp_path_factory, because macOS limits Unix socket paths to 104 bytes + and pytest's nested temp directories can exceed that. + """ + with tempfile.TemporaryDirectory(prefix="pytest_postgres_sock") as d: + yield pathlib.Path(d) + + +@pytest.fixture(scope="session") +def pg_server_global(request, bindir, datadir, sockdir, libpq_handle): + """ + Starts a running Postgres server listening on localhost. The HBA initially + allows only local UNIX connections from the same user. + + Returns a PostgresServer instance with methods for server management, configuration, + and creating test databases/users. + """ + server = PostgresServer("default", bindir, datadir, sockdir, libpq_handle) + try: + server.start() + except Exception: + # normally we only add the global server for reporting when the test + # actually uses the pg fixture. But if the server fails to start here, + # then we won't have that opportunity, so add it now to ensure any + # startup logs are included in the report. + _record_server_for_log_reporting(request, server) + raise + + yield server + + # Cleanup any test resources + server.cleanup() + + # Stop the server + server.stop() + + +@pytest.fixture(scope="module") +def pg_server_module(pg_server_global): + """ + Module-scoped server context. Which can be useful so that certain settings + can be overriden at the module level through autouse fixtures. An example + of this is in the SSL tests. + """ + with pg_server_global.subcontext() as s: + yield s + + +@pytest.fixture +def pg(request, pg_server_module, remaining_timeout): + """ + Per-test server context. Use this fixture to make changes to the server + which will be rolled back at the end of the test (e.g., creating test + users/databases). + + Also captures the PostgreSQL log position at test start so that any new + log entries can be included in the test report on failure. + """ + with pg_server_module.start_new_test(remaining_timeout) as s: + _record_server_for_log_reporting(request, s) + yield s + + +@pytest.fixture +def conn(pg): + """ + Returns a connected PGconn instance to the test PostgreSQL server. + The connection is automatically cleaned up at the end of the test. + + Example: + def test_something(conn): + result = conn.sql("SELECT 1") + assert result == 1 + """ + return pg.connect() + + +@pytest.fixture +def create_pg(request, bindir, sockdir, libpq_handle, tmp_check, remaining_timeout): + """ + Factory fixture to create additional PostgreSQL servers (per-test scope). + + Returns a function that creates new PostgreSQL server instances. + Servers are automatically cleaned up at the end of the test. + + Example: + def test_multiple_servers(create_pg): + node1 = create_pg() + node2 = create_pg() + node3 = create_pg() + """ + servers = [] + + def _create(name=None, **kwargs): + if name is None: + count = len(servers) + 1 + name = f"pg{count}" + + datadir = tmp_check / f"pgdata_{name}" + server = PostgresServer(name, bindir, datadir, sockdir, libpq_handle, **kwargs) + servers.append(server) + _record_server_for_log_reporting(request, server) + server.set_timeout(remaining_timeout) + server.start() + return server + + yield _create + + for server in servers: + server.cleanup() + server.stop() + + +@pytest.fixture(scope="module") +def _module_scoped_servers(): + """Session-scoped list to track servers created by create_pg_module.""" + return [] + + +@pytest.fixture(scope="module") +def create_pg_module( + request, + bindir, + sockdir, + libpq_handle, + tmp_check, + remaining_timeout_module, + _module_scoped_servers, +): + """ + Factory fixture to create additional PostgreSQL servers (module scope). + + Like create_pg, but servers persist for the entire test module. + Use this when multiple tests in a module can share the same servers. + + The timeout is automatically set on all servers at the start of each test + via the _set_module_server_timeouts autouse fixture. + + Example: + @pytest.fixture(scope="module") + def shared_nodes(create_pg_module): + return [create_pg_module() for _ in range(3)] + """ + + def _create(name=None, **kwargs): + if name is None: + count = len(_module_scoped_servers) + 1 + name = f"pg{count}" + datadir = tmp_check / f"pgdata_{name}" + server = PostgresServer(name, bindir, datadir, sockdir, libpq_handle, **kwargs) + _module_scoped_servers.append(server) + _record_server_for_log_reporting(request, server) + server.set_timeout(remaining_timeout_module) + server.start() + return server + + yield _create + + for server in _module_scoped_servers: + server.cleanup() + server.stop() + + +@pytest.fixture(autouse=True) +def _set_module_server_timeouts(_module_scoped_servers, remaining_timeout): + """Registers all module-scoped servers for this test. + + It's hard to reliably detect whether a test uses a module-scoped server or + not. So this simply assumes all tests in the module use the module-scoped + servers. There's little harm in registering servers for tests that don't + use them. + """ + with contextlib.ExitStack() as stack: + for server in _module_scoped_servers: + stack.enter_context(server.start_new_test(remaining_timeout)) + yield + + +@pytest.hookimpl(hookwrapper=True, trylast=True) +def pytest_runtest_makereport(item, call): + """ + Adds PostgreSQL server logs to the test report sections. + """ + outcome = yield + report = outcome.get_result() + + session_servers = item.session.stash.get(_servers_key, []) + + module_node = item.getparent(pytest.Module) + module_servers = module_node.stash.get(_servers_key, []) if module_node else [] + + servers = session_servers + module_servers + item.stash.get(_servers_key, []) + + include_name = len(servers) > 1 + + for server in servers: + content = server.log_content() + if content.strip(): + section_title = f"Postgres log {report.when}" + if include_name: + section_title += f" ({server.name})" + report.sections.append((section_title, content)) + server.reset_log_position() diff --git a/src/test/pytest/pypg/server.py b/src/test/pytest/pypg/server.py new file mode 100644 index 0000000000000..c0e308b01357b --- /dev/null +++ b/src/test/pytest/pypg/server.py @@ -0,0 +1,482 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import contextlib +import os +import pathlib +import platform +import re +import shutil +import socket +import subprocess +import tempfile +from collections import namedtuple +from typing import Callable, Optional + +from .util import run +from libpq import PGconn, connect as libpq_connect + + +class FileBackup(contextlib.AbstractContextManager): + """ + A context manager which backs up a file's contents, restoring them on exit. + """ + + def __init__(self, file: pathlib.Path): + super().__init__() + + self._file = file + + def __enter__(self): + with tempfile.NamedTemporaryFile( + prefix=self._file.name, dir=self._file.parent, delete=False + ) as f: + self._backup = pathlib.Path(f.name) + + shutil.copyfile(self._file, self._backup) + + return self + + def __exit__(self, *exc): + # Swap the backup and the original file, so that the modified contents + # can still be inspected in case of failure. + tmp = self._backup.parent / (self._backup.name + ".tmp") + + shutil.copyfile(self._file, tmp) + shutil.copyfile(self._backup, self._file) + shutil.move(tmp, self._backup) + + +class HBA(FileBackup): + """ + Backs up a server's HBA configuration and provides means for temporarily + editing it. + """ + + def __init__(self, datadir: pathlib.Path): + super().__init__(datadir / "pg_hba.conf") + + def prepend(self, *lines): + """ + Temporarily prepends lines to the server's pg_hba.conf. + + As sugar for aligning HBA columns in the tests, each line can be either + a string or a list of strings. List elements will be joined by single + spaces before they are written to file. + """ + with open(self._file, "r") as f: + prior_data = f.read() + + with open(self._file, "w") as f: + for line in lines: + if isinstance(line, list): + print(*line, file=f) + else: + print(line, file=f) + + f.write(prior_data) + + +class Config(FileBackup): + """ + Backs up a server's postgresql.conf and provides means for temporarily + editing it. + """ + + def __init__(self, datadir: pathlib.Path): + super().__init__(datadir / "postgresql.conf") + + def set(self, **gucs): + """ + Temporarily appends GUC settings to the server's postgresql.conf. + """ + + with open(self._file, "a") as f: + print(file=f) + + for n, v in gucs.items(): + v = str(v) + + # Quote and escape the value for postgresql.conf single-quoted + # strings. This is doing the reversee of DeescapeQuotedString. + v = v.replace("\\", "\\\\") + v = v.replace("'", "''") + v = v.replace("\n", "\\n") + v = v.replace("\r", "\\r") + v = v.replace("\t", "\\t") + v = v.replace("\b", "\\b") + v = v.replace("\f", "\\f") + v = "'{}'".format(v) + + print(n, "=", v, file=f) + + +Backup = namedtuple("Backup", "conf, hba") + + +class PostgresServer: + """ + Represents a running PostgreSQL server instance with management utilities. + Provides methods for configuration, user/database creation, and server control. + """ + + def __init__( + self, + name, + bindir, + datadir, + sockdir, + libpq_handle, + *, + hostaddr: Optional[str] = None, + port: Optional[int] = None, + ): + """ + Initialize a PostgreSQL server instance. Call start() to actually + start the server. + + Args: + name: The name of this server instance (for logging purposes) + bindir: Path to PostgreSQL bin directory + datadir: Path to data directory for this server + sockdir: Path to directory for Unix sockets + libpq_handle: ctypes handle to libpq + hostaddr: If provided, use this specific address (e.g., "127.0.0.2") + port: If provided, use this port instead of finding a free one, + is currently only allowed if hostaddr is also provided + """ + + if hostaddr is None and port is not None: + raise NotImplementedError("port was provided without hostaddr") + + self.name = name + self.datadir = datadir + self.sockdir = sockdir + self.libpq_handle = libpq_handle + self._remaining_timeout_fn: Optional[Callable[[], float]] = None + self._bindir = bindir + self._pg_ctl = bindir / "pg_ctl" + self.log = datadir / "postgresql.log" + self._log_start_pos = 0 + + # ExitStack for cleanup callbacks + self._cleanup_stack = contextlib.ExitStack() + + # Determine whether to use Unix sockets + use_unix_sockets = platform.system() != "Windows" and hostaddr is None + + # Use INITDB_TEMPLATE if available (much faster than running initdb) + initdb_template = os.environ.get("INITDB_TEMPLATE") + if initdb_template and os.path.isdir(initdb_template): + shutil.copytree(initdb_template, datadir) + else: + if platform.system() == "Windows": + auth_method = "trust" + else: + auth_method = "peer" + run( + bindir / "initdb", + "--no-sync", + "--auth", + auth_method, + "--pgdata", + self.datadir, + ) + + # Figure out a port to listen on. Attempt to reserve both IPv4 and IPv6 + # addresses in one go. + # + # Note: socket.has_dualstack_ipv6/create_server are only in Python 3.8+. + if hostaddr is not None: + # Explicit address provided + addrs: list[str] = [hostaddr] + temp_sock = socket.socket() + if port is None: + temp_sock.bind((hostaddr, 0)) + _, port = temp_sock.getsockname() + + elif hasattr(socket, "has_dualstack_ipv6") and socket.has_dualstack_ipv6(): + addr = ("::1", 0) + temp_sock = socket.create_server( + addr, family=socket.AF_INET6, dualstack_ipv6=True + ) + + hostaddr, port, _, _ = temp_sock.getsockname() + assert hostaddr is not None + addrs = [hostaddr, "127.0.0.1"] + + else: + addr = ("127.0.0.1", 0) + + temp_sock = socket.socket() + temp_sock.bind(addr) + + hostaddr, port = temp_sock.getsockname() + assert hostaddr is not None + addrs = [hostaddr] + + # Store the computed values + self.hostaddr = hostaddr + self.port = port + # Including the host to use for connections - either the socket + # directory or TCP address + if use_unix_sockets: + self.host = str(sockdir) + else: + self.host = hostaddr + + with open(os.path.join(datadir, "postgresql.conf"), "a") as f: + print(file=f) + if use_unix_sockets: + print( + "unix_socket_directories = '{}'".format(sockdir.as_posix()), + file=f, + ) + else: + # Disable Unix sockets when using TCP to avoid lock conflicts + print("unix_socket_directories = ''", file=f) + print("listen_addresses = '{}'".format(",".join(addrs)), file=f) + print("port =", port, file=f) + print("log_connections = all", file=f) + print("fsync = off", file=f) + print("datestyle = 'ISO'", file=f) + print("timezone = 'UTC'", file=f) + + # Between closing of the socket, s, and server start, we're racing + # against anything that wants to open up ephemeral ports, so try not to + # put any new work here. + + temp_sock.close() + + def start(self): + """Start the server using pg_ctl.""" + self.pg_ctl("start") + + # Read the PID file to get the postmaster PID + with open(os.path.join(self.datadir, "postmaster.pid")) as f: + self.pid = int(f.readline().strip()) + + def current_log_position(self): + """Get the current end position of the log file.""" + if self.log.exists(): + return self.log.stat().st_size + return 0 + + def reset_log_position(self): + """Mark current log position as start for log_content().""" + self._log_start_pos = self.current_log_position() + + @contextlib.contextmanager + def start_new_test(self, remaining_timeout): + """ + Prepare server for a new test. + + Sets timeout, resets log position, and enters a cleanup subcontext. + """ + self.set_timeout(remaining_timeout) + self.reset_log_position() + with self.subcontext(): + yield self + + def psql(self, *args): + """Run psql with the given arguments.""" + self._run(os.path.join(self._bindir, "psql"), "-w", *args) + + def sql(self, query): + """Execute a SQL query via libpq. Returns simplified results.""" + with self.connect() as conn: + return conn.sql(query) + + def pg_ctl(self, *args): + """Run pg_ctl with the given arguments.""" + self._run(self._pg_ctl, "--pgdata", self.datadir, "--log", self.log, *args) + + def _run(self, cmd, *args, addenv: Optional[dict] = None): + """Run a command with PG* environment variables set.""" + subenv = dict(os.environ) + subenv.update( + { + "PGHOST": str(self.host), + "PGPORT": str(self.port), + "PGDATABASE": "postgres", + "PGDATA": str(self.datadir), + } + ) + if addenv: + subenv.update(addenv) + run(cmd, *args, env=subenv) + + def create_users(self, *userkeys: str): + """Create test users and register them for cleanup.""" + usermap = {} + for u in userkeys: + name = u + "user" + usermap[u] = name + self.psql("-c", "CREATE USER " + name) + self._cleanup_stack.callback(self.psql, "-c", "DROP USER " + name) + return usermap + + def create_dbs(self, *dbkeys: str): + """Create test databases and register them for cleanup.""" + dbmap = {} + for d in dbkeys: + name = d + "db" + dbmap[d] = name + self.psql("-c", "CREATE DATABASE " + name) + self._cleanup_stack.callback(self.psql, "-c", "DROP DATABASE " + name) + return dbmap + + @contextlib.contextmanager + def reloading(self): + """ + Provides a context manager for making configuration changes. + + If the context suite finishes successfully, the configuration will + be reloaded via pg_ctl. On teardown, the configuration changes will + be unwound, and the server will be signaled to reload again. + + The context target contains the following attributes which can be + used to configure the server: + - .conf: modifies postgresql.conf + - .hba: modifies pg_hba.conf + + For example: + + with pg_server_session.reloading() as s: + s.conf.set(log_connections="on") + s.hba.prepend("local all all trust") + """ + # Push a reload onto the stack before making any other + # unwindable changes. That way the order of operations will be + # + # # test + # - config change 1 + # - config change 2 + # - reload + # # teardown + # - undo config change 2 + # - undo config change 1 + # - reload + # + self._cleanup_stack.callback(self.pg_ctl, "reload") + yield self._backup_configuration() + + # Now actually reload + self.pg_ctl("reload") + + @contextlib.contextmanager + def restarting(self): + """Like .reloading(), but with a full server restart.""" + self._cleanup_stack.callback(self.pg_ctl, "restart") + yield self._backup_configuration() + self.pg_ctl("restart") + + def _backup_configuration(self): + # Wrap the existing HBA and configuration with FileBackups. + return Backup( + hba=self._cleanup_stack.enter_context(HBA(self.datadir)), + conf=self._cleanup_stack.enter_context(Config(self.datadir)), + ) + + @contextlib.contextmanager + def subcontext(self): + """ + Create a new cleanup context for per-test isolation. + + Temporarily replaces the cleanup stack so that any cleanup callbacks + registered within this context will be cleaned up when the context exits. + """ + old_stack = self._cleanup_stack + self._cleanup_stack = contextlib.ExitStack() + try: + self._cleanup_stack.__enter__() + yield self + finally: + self._cleanup_stack.__exit__(None, None, None) + self._cleanup_stack = old_stack + + def stop(self, mode="fast"): + """ + Stop the PostgreSQL server instance. + + Ignores failures if the server is already stopped. + """ + try: + self.pg_ctl("stop", "--mode", mode) + except subprocess.CalledProcessError: + # Server may have already been stopped + pass + + def log_content(self) -> str: + """Return log content from the current context's start position.""" + if not self.log.exists(): + return "" + with open(self.log) as f: + f.seek(self._log_start_pos) + return f.read() + + @contextlib.contextmanager + def log_contains(self, pattern, times=None): + """ + Context manager that checks if the log matches pattern during the block. + + Args: + pattern: The regex pattern to search for. + times: If None, any number of matches is accepted. + If a number, exactly that many matches are required. + """ + start_pos = self.current_log_position() + yield + with open(self.log) as f: + f.seek(start_pos) + content = f.read() + if times is None: + assert re.search(pattern, content), f"Pattern {pattern!r} not found in log" + else: + match_count = len(re.findall(pattern, content)) + assert match_count == times, ( + f"Expected {times} matches of {pattern!r}, found {match_count}" + ) + + def cleanup(self): + """Run all registered cleanup callbacks.""" + self._cleanup_stack.close() + + def set_timeout(self, remaining_timeout_fn: Callable[[], float]) -> None: + """ + Set the timeout function for connections. + This is typically called by pg fixture for each test. + """ + self._remaining_timeout_fn = remaining_timeout_fn + + def connect(self, **opts) -> PGconn: + """ + Creates a connection to this PostgreSQL server instance. + + Args: + **opts: Additional connection options (can override defaults) + + Returns: + PGconn: Connected database connection + + Example: + conn = pg.connect() + conn = pg.connect(dbname='mydb') + """ + if self._remaining_timeout_fn is None: + raise RuntimeError( + "Timeout function not set. Use set_timeout() or pg fixture." + ) + + defaults = { + "host": self.host, + "port": self.port, + "dbname": "postgres", + } + defaults.update(opts) + + return libpq_connect( + self.libpq_handle, + self._cleanup_stack, + self._remaining_timeout_fn, + **defaults, + ) diff --git a/src/test/pytest/pypg/util.py b/src/test/pytest/pypg/util.py new file mode 100644 index 0000000000000..d46a73d001969 --- /dev/null +++ b/src/test/pytest/pypg/util.py @@ -0,0 +1,55 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import shlex +import subprocess +import sys + + +def eprint(*args, **kwargs): + """eprint prints to stderr""" + print(*args, file=sys.stderr, **kwargs) + + +def run(*command, check=True, shell=None, silent=False, **kwargs): + """run runs the given command and prints it to stderr""" + + __tracebackhide__ = True # Don't show in pytest stack traces + + if shell is None: + shell = len(command) == 1 and isinstance(command[0], str) + + if shell: + command = command[0] + else: + command = list(map(str, command)) + + if not silent: + if shell: + eprint(f"+ {command}") + else: + # We could normally use shlex.join here, but it's not available in + # Python 3.6 which we still like to support + unsafe_string_cmd = " ".join(map(shlex.quote, command)) + eprint(f"+ {unsafe_string_cmd}") + + if silent: + kwargs.setdefault("stdout", subprocess.DEVNULL) + + result = subprocess.run(command, check=False, shell=shell, **kwargs) + + # Manually throw CalledProcessError to avoid subprocess.run's huge body + # poluting stack traces. + if check and result.returncode: + raise subprocess.CalledProcessError( + result.returncode, command, result.stdout, result.stderr + ) + + return result + + +def capture(command, *args, stdout=subprocess.PIPE, encoding="utf-8", **kwargs): + __tracebackhide__ = True # Don't pollute pytest stack traces + + return run( + command, *args, stdout=stdout, encoding=encoding, **kwargs + ).stdout.removesuffix("\n") diff --git a/src/test/pytest/pyt/conftest.py b/src/test/pytest/pyt/conftest.py new file mode 100644 index 0000000000000..dd73917c68ceb --- /dev/null +++ b/src/test/pytest/pyt/conftest.py @@ -0,0 +1 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group diff --git a/src/test/pytest/pyt/test_errors.py b/src/test/pytest/pyt/test_errors.py new file mode 100644 index 0000000000000..771fe8f76e362 --- /dev/null +++ b/src/test/pytest/pyt/test_errors.py @@ -0,0 +1,34 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +Tests for libpq error types and SQLSTATE-based exception mapping. +""" + +import pytest +from libpq import LibpqError + + +def test_syntax_error(conn): + """Invalid SQL syntax raises LibpqError with correct SQLSTATE.""" + with pytest.raises(LibpqError) as exc_info: + conn.sql("SELEC 1") + + err = exc_info.value + assert err.sqlstate == "42601" + assert err.sqlstate_class == "42" + assert "syntax" in str(err).lower() + + +def test_unique_violation(conn): + """Unique violation includes all error fields.""" + conn.sql("CREATE TEMP TABLE test_uv (id int CONSTRAINT test_uv_pk PRIMARY KEY)") + conn.sql("INSERT INTO test_uv VALUES (1)") + + with pytest.raises(LibpqError) as exc_info: + conn.sql("INSERT INTO test_uv VALUES (1)") + + err = exc_info.value + assert err.sqlstate == "23505" + assert err.table_name == "test_uv" + assert err.constraint_name == "test_uv_pk" + assert err.detail == "Key (id)=(1) already exists." diff --git a/src/test/pytest/pyt/test_libpq.py b/src/test/pytest/pyt/test_libpq.py new file mode 100644 index 0000000000000..1d0d9bc3b94cc --- /dev/null +++ b/src/test/pytest/pyt/test_libpq.py @@ -0,0 +1,35 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import contextlib +import os +import socket +import struct +import threading +from typing import Callable + +import pytest + +from libpq import connstr, LibpqError + + +@pytest.mark.parametrize( + "opts, expected", + [ + (dict(), ""), + (dict(port=5432), "port=5432"), + (dict(port=5432, dbname="postgres"), "port=5432 dbname=postgres"), + (dict(host=""), "host=''"), + (dict(host=" "), r"host=' '"), + (dict(keyword="'"), r"keyword=\'"), + (dict(keyword=" \\' "), r"keyword=' \\\' '"), + ], +) +def test_connstr(opts, expected): + """Tests the escape behavior for connstr().""" + assert connstr(opts) == expected + + +def test_must_connect_errors(connect): + """Tests that connect() raises LibpqError.""" + with pytest.raises(LibpqError, match="invalid connection option"): + connect(some_unknown_keyword="whatever") diff --git a/src/test/pytest/pyt/test_multi_server.py b/src/test/pytest/pyt/test_multi_server.py new file mode 100644 index 0000000000000..8ee045b0cc8dd --- /dev/null +++ b/src/test/pytest/pyt/test_multi_server.py @@ -0,0 +1,46 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +Tests demonstrating multi-server functionality using create_pg fixture. + +These tests verify that the pytest infrastructure correctly handles +multiple PostgreSQL server instances within a single test, and that +module-scoped servers persist across tests. +""" + +import pytest + + +def test_multiple_servers_basic(create_pg): + """Test that we can create and connect to multiple servers.""" + node1 = create_pg("primary") + node2 = create_pg("secondary") + + conn1 = node1.connect() + conn2 = node2.connect() + + # Each server should have its own data directory + datadir1 = conn1.sql("SHOW data_directory") + datadir2 = conn2.sql("SHOW data_directory") + assert datadir1 != datadir2 + + # Each server should be listening on a different port + assert node1.port != node2.port + + +@pytest.fixture(scope="module") +def shared_server(create_pg_module): + """A server shared across all tests in this module.""" + server = create_pg_module("shared") + server.sql("CREATE TABLE module_state (value int DEFAULT 0)") + return server + + +def test_module_server_create_row(shared_server): + """First test: create a row in the shared server.""" + shared_server.connect().sql("INSERT INTO module_state VALUES (42)") + + +def test_module_server_see_row(shared_server): + """Second test: verify we see the row from the previous test.""" + assert shared_server.connect().sql("SELECT value FROM module_state") == 42 diff --git a/src/test/pytest/pyt/test_query_helpers.py b/src/test/pytest/pyt/test_query_helpers.py new file mode 100644 index 0000000000000..abcd90842142b --- /dev/null +++ b/src/test/pytest/pyt/test_query_helpers.py @@ -0,0 +1,347 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +Tests for query helper functions with type conversion and result simplification. +""" + +import uuid + +import pytest + + +def test_single_cell_int(conn): + """Single cell integer query returns just the value.""" + result = conn.sql("SELECT 1") + assert result == 1 + assert isinstance(result, int) + + +def test_single_cell_string(conn): + """Single cell string query returns just the value.""" + result = conn.sql("SELECT 'hello'") + assert result == "hello" + assert isinstance(result, str) + + +def test_single_cell_bool(conn): + """Single cell boolean query returns just the value.""" + + result = conn.sql("SELECT true") + assert result is True + assert isinstance(result, bool) + + result = conn.sql("SELECT false") + assert result is False + + +def test_single_cell_float(conn): + """Single cell float query returns just the value.""" + + result = conn.sql("SELECT 3.14::float4") + assert isinstance(result, float) + assert abs(result - 3.14) < 0.01 + + +def test_single_cell_null(conn): + """Single cell NULL query returns None.""" + + result = conn.sql("SELECT NULL") + assert result is None + + +def test_single_row_multiple_columns(conn): + """Single row with multiple columns returns a tuple.""" + + result = conn.sql("SELECT 1, 'hello', true") + assert result == (1, "hello", True) + assert isinstance(result, tuple) + + +def test_single_column_multiple_rows(conn): + """Single column with multiple rows returns a list of values.""" + + result = conn.sql("SELECT * FROM generate_series(1, 3)") + assert result == [1, 2, 3] + assert isinstance(result, list) + + +def test_multiple_rows_and_columns(conn): + """Multiple rows and columns returns list of tuples.""" + + result = conn.sql("SELECT * FROM (VALUES (1, 'a'), (2, 'b'), (3, 'c')) AS t") + assert result == [(1, "a"), (2, "b"), (3, "c")] + assert isinstance(result, list) + assert all(isinstance(row, tuple) for row in result) + + +def test_empty_result(conn): + """Empty result set returns empty list.""" + + result = conn.sql("SELECT 1 WHERE false") + assert result == [] + + +def test_query_error_handling(conn): + """Query errors raise RuntimeError with actual error message.""" + + with pytest.raises(RuntimeError) as exc_info: + conn.sql("SELECT * FROM nonexistent_table") + + error_msg = str(exc_info.value) + assert "nonexistent_table" in error_msg or "does not exist" in error_msg + + +def test_division_by_zero_error(conn): + """Division by zero raises RuntimeError.""" + + with pytest.raises(RuntimeError) as exc_info: + conn.sql("SELECT 1/0") + + error_msg = str(exc_info.value) + assert "division by zero" in error_msg.lower() + + +def test_simple_exec_create_table(conn): + """sql for CREATE TABLE returns None.""" + + result = conn.sql("CREATE TEMP TABLE test_table (id int, name text)") + assert result is None + + # Verify table was created + count = conn.sql("SELECT COUNT(*) FROM test_table") + assert count == 0 + + +def test_simple_exec_insert(conn): + """sql for INSERT returns None.""" + + conn.sql("CREATE TEMP TABLE test_table (id int, name text)") + result = conn.sql("INSERT INTO test_table VALUES (1, 'Alice'), (2, 'Bob')") + assert result is None + + # Verify data was inserted + count = conn.sql("SELECT COUNT(*) FROM test_table") + assert count == 2 + + +def test_type_conversion_mixed(conn): + """Test mixed type conversion in a single row.""" + + result = conn.sql("SELECT 42::int4, 123::int8, 3.14::float8, 'text', true, NULL") + assert result == (42, 123, 3.14, "text", True, None) + assert isinstance(result[0], int) + assert isinstance(result[1], int) + assert isinstance(result[2], float) + assert isinstance(result[3], str) + assert isinstance(result[4], bool) + assert result[5] is None + + +def test_multiple_queries_same_connection(conn): + """Test running multiple queries on the same connection.""" + + result1 = conn.sql("SELECT 1") + assert result1 == 1 + + result2 = conn.sql("SELECT 'hello', 'world'") + assert result2 == ("hello", "world") + + result3 = conn.sql("SELECT * FROM generate_series(1, 5)") + assert result3 == [1, 2, 3, 4, 5] + + +def test_date_type(conn): + """Test date type conversion.""" + import datetime + + result = conn.sql("SELECT '2025-10-20'::date") + assert result == datetime.date(2025, 10, 20) + assert isinstance(result, datetime.date) + + +def test_timestamp_type(conn): + """Test timestamp type conversion.""" + import datetime + + result = conn.sql("SELECT '2025-10-20 15:30:45'::timestamp") + assert result == datetime.datetime(2025, 10, 20, 15, 30, 45) + assert isinstance(result, datetime.datetime) + + +def test_time_type(conn): + """Test time type conversion.""" + import datetime + + result = conn.sql("SELECT '15:30:45'::time") + assert result == datetime.time(15, 30, 45) + assert isinstance(result, datetime.time) + + +def test_numeric_type(conn): + """Test numeric/decimal type conversion.""" + import decimal + + result = conn.sql("SELECT 123.456::numeric") + assert result == decimal.Decimal("123.456") + assert isinstance(result, decimal.Decimal) + + +def test_int_array(conn): + """Test integer array type conversion.""" + + result = conn.sql("SELECT ARRAY[1, 2, 3, 4, 5]") + assert result == [1, 2, 3, 4, 5] + assert isinstance(result, list) + assert all(isinstance(x, int) for x in result) + + +def test_text_array(conn): + """Test text array type conversion.""" + + result = conn.sql("SELECT ARRAY['hello', 'world', 'test']") + assert result == ["hello", "world", "test"] + assert isinstance(result, list) + assert all(isinstance(x, str) for x in result) + + +def test_bool_array(conn): + """Test boolean array type conversion.""" + + result = conn.sql("SELECT ARRAY[true, false, true]") + assert result == [True, False, True] + assert isinstance(result, list) + assert all(isinstance(x, bool) for x in result) + + +def test_empty_array(conn): + """Test empty array type conversion.""" + + result = conn.sql("SELECT ARRAY[]::int[]") + assert result == [] + assert isinstance(result, list) + + +def test_json_type(conn): + """Test JSON type (parsed to dict).""" + + result = conn.sql('SELECT \'{"key": "value"}\'::json') + assert isinstance(result, dict) + assert result == {"key": "value"} + + +def test_jsonb_type(conn): + """Test JSONB type (parsed to dict).""" + + result = conn.sql('SELECT \'{"name": "test", "count": 42}\'::jsonb') + assert isinstance(result, dict) + assert result == {"name": "test", "count": 42} + + +def test_json_array(conn): + """Test JSON array type.""" + + result = conn.sql("SELECT '[1, 2, 3, 4, 5]'::json") + assert isinstance(result, list) + assert result == [1, 2, 3, 4, 5] + + +def test_json_nested(conn): + """Test nested JSON object.""" + + result = conn.sql( + 'SELECT \'{"user": {"id": 1, "name": "Alice"}, "active": true}\'::json' + ) + assert isinstance(result, dict) + assert result == {"user": {"id": 1, "name": "Alice"}, "active": True} + + +def test_mixed_types_with_arrays(conn): + """Test mixed types including arrays in a single row.""" + + result = conn.sql("SELECT 42, 'text', ARRAY[1, 2, 3], true") + assert result == (42, "text", [1, 2, 3], True) + assert isinstance(result[0], int) + assert isinstance(result[1], str) + assert isinstance(result[2], list) + assert isinstance(result[3], bool) + + +def test_uuid_type(conn): + """Test UUID type conversion.""" + test_uuid = "550e8400-e29b-41d4-a716-446655440000" + result = conn.sql(f"SELECT '{test_uuid}'::uuid") + assert result == uuid.UUID(test_uuid) + assert isinstance(result, uuid.UUID) + + +def test_uuid_generation(conn): + """Test generated UUID type conversion.""" + result = conn.sql("SELECT uuidv4()") + assert isinstance(result, uuid.UUID) + # Check it's a valid UUID by ensuring it can be converted to string + assert len(str(result)) == 36 # UUID string format length + + +def test_text_array_with_commas(conn): + """Test text array with elements containing commas.""" + + result = conn.sql("SELECT ARRAY['A,B', 'C', ' D ']") + assert result == ["A,B", "C", " D "] + + +def test_text_array_with_quotes(conn): + """Test text array with elements containing quotes.""" + + result = conn.sql(r"SELECT ARRAY[E'a\"b', 'c']") + assert result == ['a"b', "c"] + + +def test_text_array_with_backslash(conn): + """Test text array with elements containing backslashes.""" + + result = conn.sql(r"SELECT ARRAY[E'a\\b', 'c']") + assert result == ["a\\b", "c"] + + +def test_json_array_type(conn): + """Test array of JSON values with embedded quotes and commas.""" + + result = conn.sql("""SELECT ARRAY['{"abc": 123, "xyz": 456}'::json]""") + assert result == [{"abc": 123, "xyz": 456}] + + +def test_json_array_multiple(conn): + """Test array of multiple JSON objects.""" + + result = conn.sql( + """SELECT ARRAY['{"a": 1}'::json, '{"b": 2}'::json, '["x", "y"]'::json]""" + ) + assert result == [{"a": 1}, {"b": 2}, ["x", "y"]] + + +def test_2d_int_array(conn): + """Test 2D integer array.""" + + result = conn.sql("SELECT ARRAY[[1,2],[3,4]]") + assert result == [[1, 2], [3, 4]] + + +def test_2d_text_array(conn): + """Test 2D integer array.""" + + result = conn.sql("SELECT ARRAY[['a','b'],['c','d,e']]") + assert result == [["a", "b"], ["c", "d,e"]] + + +def test_3d_int_array(conn): + """Test 3D integer array.""" + + result = conn.sql("SELECT ARRAY[[[1,2],[3,4]],[[5,6],[7,8]]]") + assert result == [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] + + +def test_array_with_null(conn): + """Test array with NULL elements.""" + + result = conn.sql("SELECT ARRAY[1, NULL, 3]") + assert result == [1, None, 3] From 1d485d6cf0794f071919a1a1449b3e8adee9659d Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Fri, 26 Dec 2025 12:31:43 +0100 Subject: [PATCH 05/36] POC: Convert load balance tests from perl to python This is a proof of concept to show how to use the pytest test infrastructure. It converts two existing tests that could not share code. And now they do. If we ever introduce another load balance method (e.g. round robin). We can easily test it for both DNS and hostlist based load balancing by adding a single new test function. --- src/interfaces/libpq/Makefile | 1 + src/interfaces/libpq/meson.build | 7 +- src/interfaces/libpq/pyt/test_load_balance.py | 170 ++++++++++++++++++ .../libpq/t/003_load_balance_host_list.pl | 94 ---------- .../libpq/t/004_load_balance_dns.pl | 144 --------------- 5 files changed, 176 insertions(+), 240 deletions(-) create mode 100644 src/interfaces/libpq/pyt/test_load_balance.py delete mode 100644 src/interfaces/libpq/t/003_load_balance_host_list.pl delete mode 100644 src/interfaces/libpq/t/004_load_balance_dns.pl diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile index 0963995eed422..d088142f8c601 100644 --- a/src/interfaces/libpq/Makefile +++ b/src/interfaces/libpq/Makefile @@ -169,6 +169,7 @@ check installcheck: export PATH := $(CURDIR)/test:$(PATH) check: test-build all $(prove_check) + $(pytest_check) installcheck: test-build all $(prove_installcheck) diff --git a/src/interfaces/libpq/meson.build b/src/interfaces/libpq/meson.build index b0ae72167a1ca..62cde97d16931 100644 --- a/src/interfaces/libpq/meson.build +++ b/src/interfaces/libpq/meson.build @@ -157,8 +157,6 @@ tests += { 'tests': [ 't/001_uri.pl', 't/002_api.pl', - 't/003_load_balance_host_list.pl', - 't/004_load_balance_dns.pl', 't/005_negotiate_encryption.pl', 't/006_service.pl', ], @@ -169,6 +167,11 @@ tests += { }, 'deps': libpq_test_deps, }, + 'pytest': { + 'tests': [ + 'pyt/test_load_balance.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/interfaces/libpq/pyt/test_load_balance.py b/src/interfaces/libpq/pyt/test_load_balance.py new file mode 100644 index 0000000000000..0af46d8f37ded --- /dev/null +++ b/src/interfaces/libpq/pyt/test_load_balance.py @@ -0,0 +1,170 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +Tests for load_balance_hosts connection parameter. + +These tests verify that libpq correctly handles load balancing across multiple +PostgreSQL servers specified in the connection string. +""" + +import platform +import re + +import pytest + +from libpq import LibpqError +import pypg + + +@pytest.fixture(scope="module") +def load_balance_nodes_hostlist(create_pg_module): + """ + Create 3 PostgreSQL nodes with different socket directories. + + Each node has its own Unix socket directory for isolation. + Returns a tuple of (nodes, connect). + """ + nodes = [create_pg_module() for _ in range(3)] + + hostlist = ",".join(node.host for node in nodes) + portlist = ",".join(str(node.port) for node in nodes) + + def connect(**kwargs): + return nodes[0].connect(host=hostlist, port=portlist, **kwargs) + + return nodes, connect + + +@pytest.fixture(scope="module") +def load_balance_nodes_dns(create_pg_module): + """ + Create 3 PostgreSQL nodes on the same port but different IP addresses. + + Uses 127.0.0.1, 127.0.0.2, 127.0.0.3 with a shared port, so that + connections to 'pg-loadbalancetest' can be load balanced via DNS. + + Since setting up a DNS server is more effort than we consider reasonable to + run this test, this situation is instead imitated by using a hosts file + where a single hostname maps to multiple different IP addresses. This test + requires the administrator to add the following lines to the hosts file (if + we detect that this hasn't happened we skip the test): + + 127.0.0.1 pg-loadbalancetest + 127.0.0.2 pg-loadbalancetest + 127.0.0.3 pg-loadbalancetest + + Windows or Linux are required to run this test because these OSes allow + binding to 127.0.0.2 and 127.0.0.3 addresses by default, but other OSes + don't. We need to bind to different IP addresses, so that we can use these + different IP addresses in the hosts file. + + The hosts file needs to be prepared before running this test. We don't do + it on the fly, because it requires root permissions to change the hosts + file. In CI we set up the previously mentioned rules in the hosts file, so + that this load balancing method is tested. + + Requires PG_TEST_EXTRA=load_balance because it requires this manual hosts + file configuration and also uses TCP with trust auth, which is potentially + unsafe on multiuser systems. + """ + pypg.skip_unless_test_extras("load_balance") + + if platform.system() not in ("Linux", "Windows"): + pytest.skip("DNS load balance test only supported on Linux and Windows") + + if platform.system() == "Windows": + hosts_path = r"c:\Windows\System32\Drivers\etc\hosts" + else: + hosts_path = "/etc/hosts" + + try: + with open(hosts_path) as f: + hosts_content = f.read() + except (OSError, IOError): + pytest.skip(f"Could not read hosts file: {hosts_path}") + + count = len(re.findall(r"127\.0\.0\.[1-3]\s+pg-loadbalancetest", hosts_content)) + if count != 3: + pytest.skip("hosts file not prepared for DNS load balance test") + + first_node = create_pg_module(hostaddr="127.0.0.1") + nodes = [ + first_node, + create_pg_module(hostaddr="127.0.0.2", port=first_node.port), + create_pg_module(hostaddr="127.0.0.3", port=first_node.port), + ] + + # Allow trust authentication for TCP connections from loopback + for node in nodes: + hba_path = node.datadir / "pg_hba.conf" + with open(hba_path, "r") as f: + original_content = f.read() + with open(hba_path, "w") as f: + f.write("host all all 127.0.0.0/8 trust\n") + f.write(original_content) + node.pg_ctl("reload") + + def connect(**kwargs): + return nodes[0].connect(host="pg-loadbalancetest", **kwargs) + + return nodes, connect + + +@pytest.fixture(scope="module", params=["hostlist", "dns"]) +def load_balance_nodes(request): + """ + Parametrized fixture providing both load balancing test environments. + """ + return request.getfixturevalue(f"load_balance_nodes_{request.param}") + + +def test_load_balance_hosts_invalid_value(load_balance_nodes): + """load_balance_hosts doesn't accept unknown values.""" + _, connect = load_balance_nodes + + with pytest.raises( + LibpqError, match='invalid load_balance_hosts value: "doesnotexist"' + ): + connect(load_balance_hosts="doesnotexist") + + +def test_load_balance_hosts_disable(load_balance_nodes): + """load_balance_hosts=disable always connects to the first node.""" + nodes, connect = load_balance_nodes + + with nodes[0].log_contains("connection received"): + connect(load_balance_hosts="disable") + + +def test_load_balance_hosts_random_distribution(load_balance_nodes): + """load_balance_hosts=random distributes connections across all nodes.""" + nodes, connect = load_balance_nodes + + for _ in range(50): + connect(load_balance_hosts="random") + + occurrences = [ + len(re.findall("connection received", node.log_content())) for node in nodes + ] + + # Statistically, each node should receive at least one connection. + # The probability of any node receiving 0 connections is (2/3)^50 ≈ 1.57e-9 + assert occurrences[0] > 0, "node1 should receive at least one connection" + assert occurrences[1] > 0, "node2 should receive at least one connection" + assert occurrences[2] > 0, "node3 should receive at least one connection" + assert sum(occurrences) == 50, "total connections should be 50" + + +def test_load_balance_hosts_failover(load_balance_nodes): + """load_balance_hosts continues trying hosts until it finds a working one.""" + nodes, connect = load_balance_nodes + + nodes[0].stop() + nodes[1].stop() + + with nodes[2].log_contains("connection received"): + connect(load_balance_hosts="disable") + + with nodes[2].log_contains("connection received", times=5): + for _ in range(5): + connect(load_balance_hosts="random") diff --git a/src/interfaces/libpq/t/003_load_balance_host_list.pl b/src/interfaces/libpq/t/003_load_balance_host_list.pl deleted file mode 100644 index 1f970ff994b51..0000000000000 --- a/src/interfaces/libpq/t/003_load_balance_host_list.pl +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2023-2026, PostgreSQL Global Development Group -use strict; -use warnings FATAL => 'all'; -use Config; -use PostgreSQL::Test::Utils; -use PostgreSQL::Test::Cluster; -use Test::More; - -# This tests load balancing across the list of different hosts in the host -# parameter of the connection string. - -# Cluster setup which is shared for testing both load balancing methods -my $node1 = PostgreSQL::Test::Cluster->new('node1'); -my $node2 = PostgreSQL::Test::Cluster->new('node2', own_host => 1); -my $node3 = PostgreSQL::Test::Cluster->new('node3', own_host => 1); - -# Create a data directory with initdb -$node1->init(); -$node2->init(); -$node3->init(); - -# Start the PostgreSQL server -$node1->start(); -$node2->start(); -$node3->start(); - -# Start the tests for load balancing method 1 -my $hostlist = $node1->host . ',' . $node2->host . ',' . $node3->host; -my $portlist = $node1->port . ',' . $node2->port . ',' . $node3->port; - -$node1->connect_fails( - "host=$hostlist port=$portlist load_balance_hosts=doesnotexist", - "load_balance_hosts doesn't accept unknown values", - expected_stderr => qr/invalid load_balance_hosts value: "doesnotexist"/); - -# load_balance_hosts=disable should always choose the first one. -$node1->connect_ok( - "host=$hostlist port=$portlist load_balance_hosts=disable", - "load_balance_hosts=disable connects to the first node", - sql => "SELECT 'connect1'", - log_like => [qr/statement: SELECT 'connect1'/]); - -# Statistically the following loop with load_balance_hosts=random will almost -# certainly connect at least once to each of the nodes. The chance of that not -# happening is so small that it's negligible: (2/3)^50 = 1.56832855e-9 -foreach my $i (1 .. 50) -{ - $node1->connect_ok( - "host=$hostlist port=$portlist load_balance_hosts=random", - "repeated connections with random load balancing", - sql => "SELECT 'connect2'"); -} - -my $node1_occurrences = () = - $node1->log_content() =~ /statement: SELECT 'connect2'/g; -my $node2_occurrences = () = - $node2->log_content() =~ /statement: SELECT 'connect2'/g; -my $node3_occurrences = () = - $node3->log_content() =~ /statement: SELECT 'connect2'/g; - -my $total_occurrences = - $node1_occurrences + $node2_occurrences + $node3_occurrences; - -cmp_ok($node1_occurrences, '>', 1, - "received at least one connection on node1"); -cmp_ok($node2_occurrences, '>', 1, - "received at least one connection on node2"); -cmp_ok($node3_occurrences, '>', 1, - "received at least one connection on node3"); -is($total_occurrences, 50, "received 50 connections across all nodes"); - -$node1->stop(); -$node2->stop(); - -# load_balance_hosts=disable should continue trying hosts until it finds a -# working one. -$node3->connect_ok( - "host=$hostlist port=$portlist load_balance_hosts=disable", - "load_balance_hosts=disable continues until it connects to the a working node", - sql => "SELECT 'connect3'", - log_like => [qr/statement: SELECT 'connect3'/]); - -# Also with load_balance_hosts=random we continue to the next nodes if previous -# ones are down. Connect a few times to make sure it's not just lucky. -foreach my $i (1 .. 5) -{ - $node3->connect_ok( - "host=$hostlist port=$portlist load_balance_hosts=random", - "load_balance_hosts=random continues until it connects to the a working node", - sql => "SELECT 'connect4'", - log_like => [qr/statement: SELECT 'connect4'/]); -} - -done_testing(); diff --git a/src/interfaces/libpq/t/004_load_balance_dns.pl b/src/interfaces/libpq/t/004_load_balance_dns.pl deleted file mode 100644 index e1ff9a0602480..0000000000000 --- a/src/interfaces/libpq/t/004_load_balance_dns.pl +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2023-2026, PostgreSQL Global Development Group -use strict; -use warnings FATAL => 'all'; -use Config; -use PostgreSQL::Test::Utils; -use PostgreSQL::Test::Cluster; -use Test::More; - -if (!$ENV{PG_TEST_EXTRA} || $ENV{PG_TEST_EXTRA} !~ /\bload_balance\b/) -{ - plan skip_all => - 'Potentially unsafe test load_balance not enabled in PG_TEST_EXTRA'; -} - -# This tests loadbalancing based on a DNS entry that contains multiple records -# for different IPs. Since setting up a DNS server is more effort than we -# consider reasonable to run this test, this situation is instead imitated by -# using a hosts file where a single hostname maps to multiple different IP -# addresses. This test requires the administrator to add the following lines to -# the hosts file (if we detect that this hasn't happened we skip the test): -# -# 127.0.0.1 pg-loadbalancetest -# 127.0.0.2 pg-loadbalancetest -# 127.0.0.3 pg-loadbalancetest -# -# Windows or Linux are required to run this test because these OSes allow -# binding to 127.0.0.2 and 127.0.0.3 addresses by default, but other OSes -# don't. We need to bind to different IP addresses, so that we can use these -# different IP addresses in the hosts file. -# -# The hosts file needs to be prepared before running this test. We don't do it -# on the fly, because it requires root permissions to change the hosts file. In -# CI we set up the previously mentioned rules in the hosts file, so that this -# load balancing method is tested. - -# Cluster setup which is shared for testing both load balancing methods -my $can_bind_to_127_0_0_2 = - $Config{osname} eq 'linux' || $PostgreSQL::Test::Utils::windows_os; - -# Checks for the requirements for testing load balancing method 2 -if (!$can_bind_to_127_0_0_2) -{ - plan skip_all => 'load_balance test only supported on Linux and Windows'; -} - -my $hosts_path; -if ($windows_os) -{ - $hosts_path = 'c:\Windows\System32\Drivers\etc\hosts'; -} -else -{ - $hosts_path = '/etc/hosts'; -} - -my $hosts_content = PostgreSQL::Test::Utils::slurp_file($hosts_path); - -my $hosts_count = () = - $hosts_content =~ /127\.0\.0\.[1-3] pg-loadbalancetest/g; -if ($hosts_count != 3) -{ - # Host file is not prepared for this test - plan skip_all => "hosts file was not prepared for DNS load balance test"; -} - -$PostgreSQL::Test::Cluster::use_tcp = 1; -$PostgreSQL::Test::Cluster::test_pghost = '127.0.0.1'; -my $port = PostgreSQL::Test::Cluster::get_free_port(); -my $node1 = PostgreSQL::Test::Cluster->new('node1', port => $port); -my $node2 = - PostgreSQL::Test::Cluster->new('node2', port => $port, own_host => 1); -my $node3 = - PostgreSQL::Test::Cluster->new('node3', port => $port, own_host => 1); - -# Create a data directory with initdb -$node1->init(); -$node2->init(); -$node3->init(); - -# Start the PostgreSQL server -$node1->start(); -$node2->start(); -$node3->start(); - -# load_balance_hosts=disable should always choose the first one. -$node1->connect_ok( - "host=pg-loadbalancetest port=$port load_balance_hosts=disable", - "load_balance_hosts=disable connects to the first node", - sql => "SELECT 'connect1'", - log_like => [qr/statement: SELECT 'connect1'/]); - - -# Statistically the following loop with load_balance_hosts=random will almost -# certainly connect at least once to each of the nodes. The chance of that not -# happening is so small that it's negligible: (2/3)^50 = 1.56832855e-9 -foreach my $i (1 .. 50) -{ - $node1->connect_ok( - "host=pg-loadbalancetest port=$port load_balance_hosts=random", - "repeated connections with random load balancing", - sql => "SELECT 'connect2'"); -} - -my $node1_occurrences = () = - $node1->log_content() =~ /statement: SELECT 'connect2'/g; -my $node2_occurrences = () = - $node2->log_content() =~ /statement: SELECT 'connect2'/g; -my $node3_occurrences = () = - $node3->log_content() =~ /statement: SELECT 'connect2'/g; - -my $total_occurrences = - $node1_occurrences + $node2_occurrences + $node3_occurrences; - -cmp_ok($node1_occurrences, '>', 1, - "received at least one connection on node1"); -cmp_ok($node2_occurrences, '>', 1, - "received at least one connection on node2"); -cmp_ok($node3_occurrences, '>', 1, - "received at least one connection on node3"); -is($total_occurrences, 50, "received 50 connections across all nodes"); - -$node1->stop(); -$node2->stop(); - -# load_balance_hosts=disable should continue trying hosts until it finds a -# working one. -$node3->connect_ok( - "host=pg-loadbalancetest port=$port load_balance_hosts=disable", - "load_balance_hosts=disable continues until it connects to a working node", - sql => "SELECT 'connect3'", - log_like => [qr/statement: SELECT 'connect3'/]); - -# Also with load_balance_hosts=random we continue to the next nodes if previous -# ones are down. Connect a few times to make sure it's not just lucky. -foreach my $i (1 .. 5) -{ - $node3->connect_ok( - "host=pg-loadbalancetest port=$port load_balance_hosts=random", - "load_balance_hosts=random continues until it connects to a working node", - sql => "SELECT 'connect4'", - log_like => [qr/statement: SELECT 'connect4'/]); -} - -done_testing(); From 754d98ad8d5f94e21054ae2b9e0a757e54fb44f5 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Tue, 16 Dec 2025 09:30:55 +0100 Subject: [PATCH 06/36] WIP: pytest: Add some SSL client tests This is a sample client-only test suite. It tests some handshake failures against a mock server, as well as a full SSL handshake + empty query + response. pyca/cryptography is added as a new package dependency. Certificates for testing are generated on the fly. The mock design is threaded: the server socket is listening on a background thread, and the test provides the server logic via a callback. There is some additional work still needed to make this production-ready; see the notes for _TCPServer.background(). (Currently, an exception in the wrong place could result in a hang-until-timeout rather than an immediate failure.) TODOs: - local_server and tcp_server_class are nearly identical and should share code. - fix exception-related timeouts for .background() - figure out the proper use of "session" vs "module" scope - ensure that pq.libpq unwinds (to close connections) before tcp_server; see comment in test_server_with_ssl_disabled() --- pyproject.toml | 8 + src/test/pytest/pyt/test_libpq.py | 145 ++++++++++++++++ src/test/ssl/Makefile | 2 + src/test/ssl/meson.build | 6 + src/test/ssl/pyt/conftest.py | 128 ++++++++++++++ src/test/ssl/pyt/test_client.py | 280 ++++++++++++++++++++++++++++++ 6 files changed, 569 insertions(+) create mode 100644 src/test/ssl/pyt/conftest.py create mode 100644 src/test/ssl/pyt/test_client.py diff --git a/pyproject.toml b/pyproject.toml index 4628d2274e010..00c8ae885831a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,14 @@ dependencies = [ # Any other dependencies are effectively optional (added below). We import # these libraries using pytest.importorskip(). So tests will be skipped if # they are not available. + + # Notes on the cryptography package: + # - 3.3.2 is shipped on Debian bullseye. + # - 3.4.x drops support for Python 2, making it a version of note for older LTS + # distros. + # - 35.x switched versioning schemes and moved to Rust parsing. + # - 40.x is the last version supporting Python 3.6. + "cryptography >= 3.3.2", ] [tool.pytest.ini_options] diff --git a/src/test/pytest/pyt/test_libpq.py b/src/test/pytest/pyt/test_libpq.py index 1d0d9bc3b94cc..49726f579e231 100644 --- a/src/test/pytest/pyt/test_libpq.py +++ b/src/test/pytest/pyt/test_libpq.py @@ -33,3 +33,148 @@ def test_must_connect_errors(connect): """Tests that connect() raises LibpqError.""" with pytest.raises(LibpqError, match="invalid connection option"): connect(some_unknown_keyword="whatever") + + +@pytest.fixture +def local_server(sockdir, remaining_timeout): + """ + Opens up a local UNIX socket for mocking a Postgres server on a background + thread. See the _Server API for usage. + + This fixture requires AF_UNIX support; dependent tests will be skipped on + platforms that don't provide it. + """ + + try: + from socket import AF_UNIX + except ImportError: + pytest.skip("AF_UNIX not supported on this platform") + + class _Server(contextlib.ExitStack): + """ + Implementation class for local_server. See .background() for the primary + entry point for tests. Postgres clients may connect to this server via + local_server.host/local_server.port. + + _Server derives from contextlib.ExitStack to provide easy cleanup of + associated resources; see the documentation for that class for a full + explanation. + """ + + def __init__(self): + super().__init__() + + self.host = sockdir + + # Get a free port number from the OS to avoid collisions. + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + self.port = s.getsockname()[1] + + self._thread = None + self._thread_exc = None + self._listener = self.enter_context( + socket.socket(AF_UNIX, socket.SOCK_STREAM), + ) + + def bind_and_listen(self): + """ + Does the actual work of binding the UNIX socket using the Postgres + server conventions and listening for connections. + + The listen backlog is currently hardcoded to one. + """ + sockfile = self.host / ".s.PGSQL.{}".format(self.port) + + # Lock down the permissions on the new socket. + prev_mask = os.umask(0o077) + + # Bind (creating the socket file), and immediately register it for + # deletion from disk when the stack is cleaned up. + self._listener.bind(bytes(sockfile)) + self.callback(os.unlink, sockfile) + + os.umask(prev_mask) + + self._listener.listen(1) + + def background(self, fn: Callable[[socket.socket], None]) -> None: + """ + Accepts a client connection on a background thread and passes it to + the provided callback. Any exceptions raised from the callback will + be re-raised on the main thread during fixture teardown. + + Blocking operations on the connected socket default to using the + remaining_timeout(), though this can be changed by the test via the + socket's .settimeout(). + """ + + def _bg(): + try: + self._listener.settimeout(remaining_timeout()) + sock, _ = self._listener.accept() + + with sock: + sock.settimeout(remaining_timeout()) + fn(sock) + + except Exception as e: + # Save the exception for re-raising on the main thread. + self._thread_exc = e + + # TODO: rather than using callback(), consider explicitly signaling + # the fn() implementation to stop early if we get an exception. + # Otherwise we'll hang until the end of the timeout. + self._thread = threading.Thread(target=_bg) + self.callback(self._join) + + self._thread.start() + + def _join(self): + """ + Waits for the background thread to finish and raises any thrown + exception. This is called during fixture teardown. + """ + # Give a little bit of wiggle room on the join timeout, since we're + # racing against the test's own use of remaining_timeout(). (It's + # preferable to let tests report timeouts; the stack traces will + # help with debugging.) + self._thread.join(remaining_timeout() + 1) + if self._thread.is_alive(): + raise TimeoutError("background thread is still running after timeout") + + if self._thread_exc is not None: + raise self._thread_exc + + with _Server() as s: + s.bind_and_listen() + yield s + + +def test_connection_is_finished_on_error(connect, local_server): + """Tests that PQfinish() gets called at the end of testing.""" + expected_error = "something is wrong" + + def serve_error(s: socket.socket) -> None: + pktlen = struct.unpack("!I", s.recv(4))[0] + + # Quick check for the startup packet version. + version = struct.unpack("!HH", s.recv(4)) + assert version == (3, 2) + + # Discard the remainder of the startup packet and send a v2 error. + s.recv(pktlen - 8) + s.send(b"E" + expected_error.encode() + b"\0") + + # And now the socket should be closed. + assert not s.recv(1), "client sent unexpected data" + + local_server.background(serve_error) + + with pytest.raises(LibpqError, match=expected_error): + # Exiting this context should result in PQfinish(). + connect( + host=local_server.host, + port=local_server.port, + max_protocol_version="3.2", # Don't use grease + ) diff --git a/src/test/ssl/Makefile b/src/test/ssl/Makefile index aa062945fb9f3..287729ad9fb97 100644 --- a/src/test/ssl/Makefile +++ b/src/test/ssl/Makefile @@ -30,6 +30,8 @@ clean distclean: # Doesn't depend on sslfiles because we don't rebuild them by default check: $(prove_check) + # XXX these suites should run independently, not serially + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/test/ssl/meson.build b/src/test/ssl/meson.build index d7e7ce23433ed..922d9cee0bec2 100644 --- a/src/test/ssl/meson.build +++ b/src/test/ssl/meson.build @@ -16,4 +16,10 @@ tests += { 't/004_sni.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_client.py', + 'pyt/test_server.py', + ], + }, } diff --git a/src/test/ssl/pyt/conftest.py b/src/test/ssl/pyt/conftest.py new file mode 100644 index 0000000000000..870f738ac44df --- /dev/null +++ b/src/test/ssl/pyt/conftest.py @@ -0,0 +1,128 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import datetime +import re +import subprocess +import tempfile +from collections import namedtuple + +import pytest + + +@pytest.fixture(scope="session") +def cryptography(): + return pytest.importorskip("cryptography", "3.3.2") + + +Cert = namedtuple("Cert", "cert, certpath, key, keypath") + + +@pytest.fixture(scope="session") +def certs(cryptography, tmp_path_factory): + """ + Caches commonly used certificates at the session level, and provides a way + to create new ones. + + - certs.ca: the root CA certificate + + - certs.server: the "standard" server certficate, signed by certs.ca + + - certs.server_host: the hostname of the certs.server certificate + + - certs.new(): creates a custom certificate, signed by certs.ca + """ + + from cryptography import x509 + from cryptography.hazmat.primitives import hashes, serialization + from cryptography.hazmat.primitives.asymmetric import rsa + from cryptography.x509.oid import NameOID + + tmpdir = tmp_path_factory.mktemp("test-certs") + + class _Certs: + def __init__(self): + self.ca = self.new( + x509.Name( + [x509.NameAttribute(NameOID.COMMON_NAME, "PG pytest CA")], + ), + ca=True, + ) + + self.server_host = "example.org" + self.server = self.new( + x509.Name( + [x509.NameAttribute(NameOID.COMMON_NAME, self.server_host)], + ) + ) + + def new(self, subject: x509.Name, *, ca=False) -> Cert: + """ + Creates and signs a new Cert with the given subject name. If ca is + True, the certificate will be self-signed; otherwise the certificate + is signed by self.ca. + """ + key = rsa.generate_private_key( + public_exponent=65537, + key_size=2048, + ) + + builder = x509.CertificateBuilder() + now = datetime.datetime.now(datetime.timezone.utc) + + builder = ( + builder.subject_name(subject) + .public_key(key.public_key()) + .serial_number(x509.random_serial_number()) + .not_valid_before(now) + .not_valid_after(now + datetime.timedelta(hours=1)) + ) + + if ca: + builder = builder.issuer_name(subject) + else: + builder = builder.issuer_name(self.ca.cert.subject) + + builder = builder.add_extension( + x509.BasicConstraints(ca=ca, path_length=None), + critical=True, + ) + + cert = builder.sign( + private_key=key if ca else self.ca.key, + algorithm=hashes.SHA256(), + ) + + # Dump the certificate and key to file. + keypath = self._tofile( + key.private_bytes( + serialization.Encoding.PEM, + serialization.PrivateFormat.PKCS8, + serialization.NoEncryption(), + ), + suffix=".key", + ) + certpath = self._tofile( + cert.public_bytes(serialization.Encoding.PEM), + suffix="-ca.crt" if ca else ".crt", + ) + + return Cert( + cert=cert, + certpath=certpath, + key=key, + keypath=keypath, + ) + + def _tofile(self, data: bytes, *, suffix) -> str: + """ + Dumps data to a file on disk with the requested suffix and returns + the path. The file is located somewhere in pytest's temporary + directory root. + """ + f = tempfile.NamedTemporaryFile(suffix=suffix, dir=tmpdir, delete=False) + with f: + f.write(data) + + return f.name + + return _Certs() diff --git a/src/test/ssl/pyt/test_client.py b/src/test/ssl/pyt/test_client.py new file mode 100644 index 0000000000000..4113dd21752a6 --- /dev/null +++ b/src/test/ssl/pyt/test_client.py @@ -0,0 +1,280 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import contextlib +import ctypes +import socket +import ssl +import struct +import threading +from typing import Callable + +import pytest + +import pypg +from libpq import LibpqError, ExecStatus + +# This suite opens up local TCP ports and is hidden behind PG_TEST_EXTRA=ssl. +pytestmark = pypg.require_test_extras("ssl") + + +@pytest.fixture(scope="session", autouse=True) +def skip_if_no_ssl_support(libpq_handle): + """Skips tests if SSL support is not configured.""" + + # Declare PQsslAttribute(). + PQsslAttribute = libpq_handle.PQsslAttribute + PQsslAttribute.restype = ctypes.c_char_p + PQsslAttribute.argtypes = [ctypes.c_void_p, ctypes.c_char_p] + + if not PQsslAttribute(None, b"library"): + pytest.skip("requires SSL support to be configured") + + +# +# Test Fixtures +# + + +@pytest.fixture +def tcp_server_class(remaining_timeout): + """ + Metafixture to combine related logic for tcp_server and ssl_server. + + TODO: combine with test_libpq.local_server + """ + + class _TCPServer(contextlib.ExitStack): + """ + Implementation class for tcp_server. See .background() for the primary + entry point for tests. Postgres clients may connect to this server via + **tcp_server.conninfo. + + _TCPServer derives from contextlib.ExitStack to provide easy cleanup of + associated resources; see the documentation for that class for a full + explanation. + """ + + def __init__(self): + super().__init__() + + self._thread = None + self._thread_exc = None + self._listener = self.enter_context( + socket.socket(socket.AF_INET, socket.SOCK_STREAM), + ) + + self._bind_and_listen() + sockname = self._listener.getsockname() + self.conninfo = dict( + hostaddr=sockname[0], + port=sockname[1], + ) + + def _bind_and_listen(self): + """ + Does the actual work of binding the socket and listening for + connections. + + The listen backlog is currently hardcoded to one. + """ + self._listener.bind(("127.0.0.1", 0)) + self._listener.listen(1) + + def background(self, fn: Callable[[socket.socket], None]) -> None: + """ + Accepts a client connection on a background thread and passes it to + the provided callback. Any exceptions raised from the callback will + be re-raised on the main thread during fixture teardown. + + Blocking operations on the connected socket default to using the + remaining_timeout(), though this can be changed by the test via the + socket's .settimeout(). + """ + + def _bg(): + try: + self._listener.settimeout(remaining_timeout()) + sock, _ = self._listener.accept() + + with sock: + sock.settimeout(remaining_timeout()) + fn(sock) + + except Exception as e: + # Save the exception for re-raising on the main thread. + self._thread_exc = e + + # TODO: rather than using callback(), consider explicitly signaling + # the fn() implementation to stop early if we get an exception. + # Otherwise we'll hang until the end of the timeout. + self._thread = threading.Thread(target=_bg) + self.callback(self._join) + + self._thread.start() + + def _join(self): + """ + Waits for the background thread to finish and raises any thrown + exception. This is called during fixture teardown. + """ + # Give a little bit of wiggle room on the join timeout, since we're + # racing against the test's own use of remaining_timeout(). (It's + # preferable to let tests report timeouts; the stack traces will + # help with debugging.) + self._thread.join(remaining_timeout() + 1) + if self._thread.is_alive(): + raise TimeoutError("background thread is still running after timeout") + + if self._thread_exc is not None: + raise self._thread_exc + + return _TCPServer + + +@pytest.fixture +def tcp_server(tcp_server_class): + """ + Opens up a local TCP socket for mocking a Postgres server on a background + thread. See the _TCPServer API for usage. + """ + with tcp_server_class() as s: + yield s + + +@pytest.fixture +def ssl_server(tcp_server_class, certs): + """ + Like tcp_server, but with an additional .background_ssl() method which will + perform a SSLRequest handshake on the socket before handing the connection + to the test callback. + + This server uses certs.server as its identity. + """ + + class _SSLServer(tcp_server_class): + def __init__(self): + super().__init__() + + self.conninfo["host"] = certs.server_host + + self._ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + self._ctx.load_cert_chain(certs.server.certpath, certs.server.keypath) + + def background_ssl(self, fn: Callable[[ssl.SSLSocket], None]) -> None: + """ + Invokes a server callback as with .background(), but an SSLRequest + handshake is performed first, and the socket provided to the + callback has been wrapped in an OpenSSL layer. + """ + + def handshake(s: socket.socket): + pktlen = struct.unpack("!I", s.recv(4))[0] + + # Make sure we get an SSLRequest. + version = struct.unpack("!HH", s.recv(4)) + assert version == (1234, 5679) + assert pktlen == 8 + + # Accept the SSLRequest. + s.send(b"S") + + with self._ctx.wrap_socket(s, server_side=True) as wrapped: + fn(wrapped) + + self.background(handshake) + + with _SSLServer() as s: + yield s + + +# +# Tests +# + + +@pytest.mark.parametrize("sslmode", ("require", "verify-ca", "verify-full")) +def test_server_with_ssl_disabled(connect, tcp_server, certs, sslmode): + """ + Make sure client refuses to talk to non-SSL servers with stricter + sslmodes. + """ + + def refuse_ssl(s: socket.socket): + pktlen = struct.unpack("!I", s.recv(4))[0] + + # Make sure we get an SSLRequest. + version = struct.unpack("!HH", s.recv(4)) + assert version == (1234, 5679) + assert pktlen == 8 + + # Refuse the SSLRequest. + s.send(b"N") + + # Wait for the client to close the connection. + assert not s.recv(1), "client sent unexpected data" + + tcp_server.background(refuse_ssl) + + with pytest.raises(LibpqError, match="server does not support SSL"): + connect( + **tcp_server.conninfo, + sslrootcert=certs.ca.certpath, + sslmode=sslmode, + max_protocol_version="3.2", # Don't use grease + ) + + +def test_verify_full_connection(connect, ssl_server, certs): + """Completes a verify-full connection and empty query.""" + + def handle_empty_query(s: ssl.SSLSocket): + pktlen = struct.unpack("!I", s.recv(4))[0] + + # Check the startup packet version, then discard the remainder. + version = struct.unpack("!HH", s.recv(4)) + assert version == (3, 2) + s.recv(pktlen - 8) + + # Send the required litany of server messages. + s.send(struct.pack("!cII", b"R", 8, 0)) # AuthenticationOK + + # ParameterStatus: client_encoding + key = b"client_encoding\0" + val = b"UTF-8\0" + s.send(struct.pack("!cI", b"S", 4 + len(key) + len(val)) + key + val) + + # ParameterStatus: DateStyle + key = b"DateStyle\0" + val = b"ISO, MDY\0" + s.send(struct.pack("!cI", b"S", 4 + len(key) + len(val)) + key + val) + + s.send(struct.pack("!cIII", b"K", 12, 1234, 1234)) # BackendKeyData + s.send(struct.pack("!cIc", b"Z", 5, b"I")) # ReadyForQuery + + # Expect an empty query. + pkttype = s.recv(1) + assert pkttype == b"Q" + pktlen = struct.unpack("!I", s.recv(4))[0] + assert s.recv(pktlen - 4) == b"\0" + + # Send an EmptyQueryResponse+ReadyForQuery. + s.send(struct.pack("!cI", b"I", 4)) + s.send(struct.pack("!cIc", b"Z", 5, b"I")) + + # libpq should terminate and close the connection. + assert s.recv(1) == b"X" + pktlen = struct.unpack("!I", s.recv(4))[0] + assert pktlen == 4 + + assert not s.recv(1), "client sent unexpected data" + + ssl_server.background_ssl(handle_empty_query) + + conn = connect( + **ssl_server.conninfo, + sslrootcert=certs.ca.certpath, + sslmode="verify-full", + max_protocol_version="3.2", # Don't use grease + ) + with conn: + assert conn.exec("").status() == ExecStatus.PGRES_EMPTY_QUERY From 01a5b80e0e157a9f592858e451bb9ae82810429f Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Tue, 16 Dec 2025 09:31:46 +0100 Subject: [PATCH 07/36] WIP: pytest: Add some server-side SSL tests In the same vein as the previous commit, this is a server-only test suite operating against a mock client. The test itself is a heavily parameterized check for direct-SSL handshake behavior, using a combination of "standard" and "custom" certificates via the certs fixture. installcheck is currently unsupported, but the architecture has some extension points that should make it possible later. For now, a new server is always started for the test session. TODOs: - improve remaining_timeout() integration with socket operations; at the moment, the timeout resets on every call rather than decrementing --- src/test/ssl/pyt/conftest.py | 50 ++++++++++ src/test/ssl/pyt/test_server.py | 161 ++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 src/test/ssl/pyt/test_server.py diff --git a/src/test/ssl/pyt/conftest.py b/src/test/ssl/pyt/conftest.py index 870f738ac44df..d121724800bfd 100644 --- a/src/test/ssl/pyt/conftest.py +++ b/src/test/ssl/pyt/conftest.py @@ -126,3 +126,53 @@ def _tofile(self, data: bytes, *, suffix) -> str: return f.name return _Certs() + + +@pytest.fixture(scope="module", autouse=True) +def ssl_setup(pg_server_module, certs, datadir): + """ + Sets up required server settings for all tests in this module. + """ + try: + with pg_server_module.restarting() as s: + s.conf.set( + ssl="on", + ssl_ca_file=certs.ca.certpath, + ssl_cert_file=certs.server.certpath, + ssl_key_file=certs.server.keypath, + ) + + # Reject by default. + s.hba.prepend("hostssl all all all reject") + + except subprocess.CalledProcessError: + # This is a decent place to skip if the server isn't set up for SSL. + logpath = datadir / "postgresql.log" + unsupported = re.compile("SSL is not supported") + + with open(logpath, "r") as log: + for line in log: + if unsupported.search(line): + pytest.skip("the server does not support SSL") + + # Some other error happened. + raise + + users = pg_server_module.create_users("ssl") + dbs = pg_server_module.create_dbs("ssl") + + return (users, dbs) + + +@pytest.fixture(scope="module") +def client_cert(ssl_setup, certs): + """ + Creates a Cert for the "ssl" user. + """ + from cryptography import x509 + from cryptography.x509.oid import NameOID + + users, _ = ssl_setup + user = users["ssl"] + + return certs.new(x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, user)])) diff --git a/src/test/ssl/pyt/test_server.py b/src/test/ssl/pyt/test_server.py new file mode 100644 index 0000000000000..d5cb14b6c9ac7 --- /dev/null +++ b/src/test/ssl/pyt/test_server.py @@ -0,0 +1,161 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +import re +import socket +import ssl +import struct + +import pytest + +import pypg + +# This suite opens up local TCP ports and is hidden behind PG_TEST_EXTRA=ssl. +pytestmark = pypg.require_test_extras("ssl") + +# For use with the `creds` parameter below. +CLIENT = "client" +SERVER = "server" + + +# fmt: off +@pytest.mark.parametrize( + "auth_method, creds, expected_error", +[ + # Trust allows anything. + ("trust", None, None), + ("trust", CLIENT, None), + ("trust", SERVER, None), + + # verify-ca allows any CA-signed certificate. + ("trust clientcert=verify-ca", None, "requires a valid client certificate"), + ("trust clientcert=verify-ca", CLIENT, None), + ("trust clientcert=verify-ca", SERVER, None), + + # cert and verify-full allow only the correct certificate. + ("trust clientcert=verify-full", None, "requires a valid client certificate"), + ("trust clientcert=verify-full", CLIENT, None), + ("trust clientcert=verify-full", SERVER, "authentication failed for user"), + ("cert", None, "requires a valid client certificate"), + ("cert", CLIENT, None), + ("cert", SERVER, "authentication failed for user"), +], +) +# fmt: on +def test_direct_ssl_certificate_authentication( + pg, + ssl_setup, + certs, + client_cert, + remaining_timeout, + # test parameters + auth_method, + creds, + expected_error, +): + """ + Tests direct SSL connections with various client-certificate/HBA + combinations. + """ + + # Set up the HBA as desired by the test. + users, dbs = ssl_setup + + user = users["ssl"] + db = dbs["ssl"] + + with pg.reloading() as s: + s.hba.prepend( + ["hostssl", db, user, "127.0.0.1/32", auth_method], + ["hostssl", db, user, "::1/128", auth_method], + ) + + # Configure the SSL settings for the client. + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.load_verify_locations(cafile=certs.ca.certpath) + ctx.set_alpn_protocols(["postgresql"]) # for direct SSL + + # Load up a client certificate if required by the test. + if creds == CLIENT: + ctx.load_cert_chain(client_cert.certpath, client_cert.keypath) + elif creds == SERVER: + # Using a server certificate as the client credential is expected to + # work only for clientcert=verify-ca (and `trust`, naturally). + ctx.load_cert_chain(certs.server.certpath, certs.server.keypath) + + # Make a direct SSL connection. There's no SSLRequest in the handshake; we + # simply wrap a TCP connection with OpenSSL. + addr = (pg.hostaddr, pg.port) + with socket.create_connection(addr) as s: + s.settimeout(remaining_timeout()) # XXX this resets every operation + + with ctx.wrap_socket(s, server_hostname=certs.server_host) as conn: + # Build and send the startup packet. + startup_options = dict( + user=user, + database=db, + application_name="pytest", + ) + + payload = b"" + for k, v in startup_options.items(): + payload += k.encode() + b"\0" + payload += str(v).encode() + b"\0" + payload += b"\0" # null terminator + + pktlen = 4 + 4 + len(payload) + conn.send(struct.pack("!IHH", pktlen, 3, 0) + payload) + + if not expected_error: + # Expect an AuthenticationOK to come back. + pkttype, pktlen = struct.unpack("!cI", conn.recv(5)) + assert pkttype == b"R" + assert pktlen == 8 + + authn_result = struct.unpack("!I", conn.recv(4))[0] + assert authn_result == 0 + + # Read and discard to ReadyForQuery. + while True: + pkttype, pktlen = struct.unpack("!cI", conn.recv(5)) + payload = conn.recv(pktlen - 4) + + if pkttype == b"Z": + assert payload == b"I" + break + + # Send an empty query. + conn.send(struct.pack("!cI", b"Q", 5) + b"\0") + + # Expect EmptyQueryResponse+ReadyForQuery. + pkttype, pktlen = struct.unpack("!cI", conn.recv(5)) + assert pkttype == b"I" + assert pktlen == 4 + + pkttype, pktlen = struct.unpack("!cI", conn.recv(5)) + assert pkttype == b"Z" + + payload = conn.recv(pktlen - 4) + assert payload == b"I" + + else: + # Match the expected authentication error. + pkttype, pktlen = struct.unpack("!cI", conn.recv(5)) + assert pkttype == b"E" + + payload = conn.recv(pktlen - 4) + msg = None + + for component in payload.split(b"\0"): + if not component: + break # end of message + + key, val = component[:1], component[1:] + if key == b"S": + assert val == b"FATAL" + elif key == b"M": + msg = val.decode() + + assert re.search(expected_error, msg), "server error did not match" + + # Terminate. + conn.send(struct.pack("!cI", b"X", 4)) From b7fa1ad0df8bb32a7fe3aa52d086eac49ea24098 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:31 -0400 Subject: [PATCH 08/36] pytest: add black/mypy/pylint + uv dev tooling Add the project-local dev toolchain (black, mypy, pylint configured in pyproject.toml) and uv-managed virtualenv wiring so the pytest suite is formatted, type-checked, and lint-clean for both Nix and non-Nix developers. Co-authored-by: Greg Burd --- pyproject.toml | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 00c8ae885831a..fcd6b6a61633a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,44 @@ dependencies = [ "cryptography >= 3.3.2", ] +# Optional high-level drivers. These are import-skipped at runtime, so tests +# that want them are skipped when absent. The in-tree libpq ctypes wrapper is +# the default connection path; psycopg3 is the default real driver and asyncpg +# the alternative, selected via PG_PYTEST_DRIVER. +[project.optional-dependencies] +drivers = [ + "psycopg[binary] >= 3.1", + "asyncpg >= 0.28", +] + +# Optional realistic fake-data generation (names, emails, addresses, +# sentences). Soft-imported by pypg.fake: when absent, pypg.faker() returns +# None after a single warning and pypg.meaningful_text() falls back to the +# random rand_str. rand_str itself never needs this. +fake = [ + "faker >= 20", +] + +# Dev tooling, managed project-locally by uv (uv sync --group dev). Kept out of +# the runtime dependencies so the suite stays lean; only needed to +# lint/format/type-check, never to run the tests. +[dependency-groups] +dev = [ + "black >= 24, < 26", + "mypy >= 1.8, < 2", + "pylint >= 3, < 4", + "pyrefly >= 1, < 2", +] + +[tool.uv] +# This pyproject is not itself a distributable package; uv just manages the +# project-local .venv. (pip ignores [tool.uv], so non-uv devs are unaffected.) +package = false +# The runtime suite supports Python >= 3.6 (run via system pytest on old +# distros, never via uv). Dev tooling targets modern Python, so scope uv's +# resolution to the dev interpreter range instead of the full requires-python. +environments = ["python_full_version >= '3.9'"] + [tool.pytest.ini_options] minversion = "7.0" @@ -30,3 +68,73 @@ pythonpath = ["src/test/pytest"] # Load the shared fixtures plugin addopts = ["-p", "pypg.fixtures"] + +[tool.black] +# black's default line length; pylint is aligned to it below. +line-length = 88 + +[tool.mypy] +# No explicit python_version: type-check against the dev interpreter. (The +# runtime suite still targets >= 3.6, but pytest itself now requires 3.10+ +# syntax, so pinning an old version makes mypy choke on followed library code.) +# The suite leans on ctypes/libpq and third-party libs without stubs; be +# pragmatic rather than strict so existing code stays clean while still +# catching real type errors in our own helpers. +ignore_missing_imports = true +follow_imports = "silent" +warn_unused_ignores = true +warn_redundant_casts = true +no_implicit_optional = true +# Test trees have many same-named modules (conftest.py, test_*.py) that are not +# importable packages; these settings let mypy map files to distinct modules. +namespace_packages = true +explicit_package_bases = true + +[tool.pyrefly] +# Pyrefly (https://pyrefly.org) is a fast type checker run as an extra gate. +# The in-tree infra lives here, so it can resolve `pypg`/`libpq` imports +# without relying on PYTHONPATH. +search-path = ["src/test/pytest"] + +[tool.pylint.main] +py-version = "3.9" + +[tool.pylint.format] +max-line-length = 88 +# PostgresServer is a large facade mirroring PostgreSQL::Test::Cluster (~3000 +# lines of Perl); allow it room rather than splitting it artificially. +max-module-lines = 2000 + +[tool.pylint."messages control"] +# Curated relaxations for pytest/ctypes idioms (not bug-hiding). The checks that +# matter for a cross-platform suite (encoding, broad-except, with-resources, +# real errors) stay ENABLED and are satisfied in code, not silenced here. +disable = [ + "redefined-outer-name", # pytest passes fixtures by name on purpose + "missing-module-docstring", + "missing-function-docstring", # tests and plugin hooks self-describe + "too-few-public-methods", + "duplicate-code", # parallel tests legitimately share shape + "import-outside-toplevel", # required by the importorskip() pattern + "unused-argument", # pytest hook/fixture signatures are fixed + "wrong-import-order", # in-tree pypg/libpq misread as third-party + "fixme", # TODO/XXX are tracked intentionally + "use-dict-literal", # dict(opt=...) reads well for conn options + "consider-using-f-string", +] + +[tool.pylint.design] +# Advisory complexity metrics. A server-management harness and a ctypes/error +# wrapper are inherently "wide" (many attributes/args/branches); the Perl +# equivalents are larger still. Complexity is managed by review, not a hard +# pylint threshold, so these refactor-nudges are relaxed rather than silenced +# bug checks. +max-args = 22 +max-positional-arguments = 10 +max-attributes = 20 +max-locals = 26 +max-branches = 15 +max-statements = 65 +# PostgresServer is a facade mirroring the large PostgreSQL::Test::Cluster +# module; a wide method surface is expected and intended. +max-public-methods = 90 From 6d788aeef89c7849c1eaf969701fa30520f3bb59 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:31 -0400 Subject: [PATCH 09/36] pytest: build out the pypg PostgreSQL::Test parity layer Extend the pypg framework (server, fixtures, command, util, bgpsql, interactive, rewind, sysv_shm, kerberos helpers, libpq ctypes layer, pgtap plugin) with the PostgreSQL::Test::Cluster / ::Utils primitives the ported suites need: psql-based safe_psql/poll_query_until (Perl-faithful per-statement autocommit), background/interactive psql sessions, replication helpers (wait_for_catchup, wait_for_subscription_sync, slot helpers), WAL emit/advance helpers, archiving/restoring, command_* assertions, and the infra self-tests. Cross-platform hardening (with Andrew Dunstan): portable backend signals via pg_ctl kill, character-based log offsets that fold CRLF on Windows, file-based command-output capture to avoid the pg_ctl-start pipe deadlock, server-log reporting on start failure, portable WAL archive/restore copy commands, the ASan LD_PRELOAD / libpq ABI-mismatch skip, and the pgtap TESTLOGDIR guard. Co-authored-by: Andrew Dunstan Co-authored-by: Greg Burd --- src/test/pytest/libpq/__init__.py | 2 + src/test/pytest/libpq/_core.py | 89 +- src/test/pytest/meson.build | 1 + src/test/pytest/pgtap.py | 15 +- src/test/pytest/pypg/__init__.py | 38 +- src/test/pytest/pypg/_env.py | 6 +- src/test/pytest/pypg/bgpsql.py | 247 +++++ src/test/pytest/pypg/command.py | 247 +++++ src/test/pytest/pypg/fake.py | 103 ++ src/test/pytest/pypg/fixtures.py | 59 +- src/test/pytest/pypg/interactive.py | 112 ++ src/test/pytest/pypg/kerberos.py | 337 ++++++ src/test/pytest/pypg/rewind.py | 214 ++++ src/test/pytest/pypg/server.py | 1569 ++++++++++++++++++++++++++- src/test/pytest/pypg/sysv_shm.py | 52 + src/test/pytest/pypg/util.py | 223 +++- src/test/pytest/pyt/test_fake.py | 51 + src/test/pytest/pyt/test_libpq.py | 3 +- 18 files changed, 3287 insertions(+), 81 deletions(-) create mode 100644 src/test/pytest/pypg/bgpsql.py create mode 100644 src/test/pytest/pypg/command.py create mode 100644 src/test/pytest/pypg/fake.py create mode 100644 src/test/pytest/pypg/interactive.py create mode 100644 src/test/pytest/pypg/kerberos.py create mode 100644 src/test/pytest/pypg/rewind.py create mode 100644 src/test/pytest/pypg/sysv_shm.py create mode 100644 src/test/pytest/pyt/test_fake.py diff --git a/src/test/pytest/libpq/__init__.py b/src/test/pytest/libpq/__init__.py index 6a71ebbe43f03..94554fee91f1c 100644 --- a/src/test/pytest/libpq/__init__.py +++ b/src/test/pytest/libpq/__init__.py @@ -17,6 +17,7 @@ connect, connstr, load_libpq_handle, + libpq_abi_skip_reason, register_type_info, ) @@ -31,5 +32,6 @@ "connect", "connstr", "load_libpq_handle", + "libpq_abi_skip_reason", "register_type_info", ] diff --git a/src/test/pytest/libpq/_core.py b/src/test/pytest/libpq/_core.py index 1c059b9b44657..b99661aecff17 100644 --- a/src/test/pytest/libpq/_core.py +++ b/src/test/pytest/libpq/_core.py @@ -13,13 +13,15 @@ import platform import os import uuid -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Dict, NoReturn, Optional from .errors import LibpqError # PG_DIAG field identifiers from postgres_ext.h class DiagField(enum.IntEnum): + """PG_DIAG_* field identifiers used with PQresultErrorField().""" + SEVERITY = ord("S") SEVERITY_NONLOCALIZED = ord("V") SQLSTATE = ord("C") @@ -76,6 +78,63 @@ class _PGresult(ctypes.Structure): _PGresult_p = ctypes.POINTER(_PGresult) +def _libpq_path(libdir, bindir): + """Return the platform-specific full path to libpq for this build.""" + system = platform.system() + if system in ("Linux", "FreeBSD", "NetBSD", "OpenBSD"): + # On Windows, libpq.dll is confusingly in bindir, not libdir. + return os.path.join(libdir, "libpq.so.5") + if system == "Darwin": + return os.path.join(libdir, "libpq.5.dylib") + if system == "Windows": + return os.path.join(bindir, "libpq.dll") + raise AssertionError("the libpq fixture must be updated for {}".format(system)) + + +def _elf_class(path): + """Return 1 (ELFCLASS32), 2 (ELFCLASS64), or None if path is not ELF.""" + try: + with open(path, "rb") as fh: + ident = fh.read(5) + except OSError: + return None + if ident[:4] != b"\x7fELF": + return None + return ident[4] # e_ident[EI_CLASS]: 1 = 32-bit, 2 = 64-bit + + +def libpq_abi_skip_reason(libdir, bindir): + """Return a reason to skip if this Python cannot load the build's libpq. + + The framework loads libpq in-process via ctypes, so the interpreter and the + library must share an ABI. The common mismatch is a 64-bit Python against a + 32-bit libpq (meson's -m32 build), which otherwise fails every test with + OSError: wrong ELF class. Detect it by reading the library's ELF header + rather than dlopen()ing it -- a trial dlopen of an ASan-instrumented libpq + would abort the process, not raise. Returns None when the ABI matches, when + libpq cannot be located, or when the file is not ELF (macOS/Windows). + + Co-authored-by: Andrew Dunstan + """ + try: + path = _libpq_path(libdir, bindir) + except AssertionError: + return None + elf_class = _elf_class(path) + if elf_class is None: + return None + py_bits = ctypes.sizeof(ctypes.c_void_p) * 8 + lib_bits = 64 if elf_class == 2 else 32 + if py_bits != lib_bits: + return ( + "{py}-bit Python cannot load {lib}-bit libpq ({path}); the " + "in-process libpq framework needs a {lib}-bit interpreter".format( + py=py_bits, lib=lib_bits, path=path + ) + ) + return None + + def load_libpq_handle(libdir, bindir): """ Loads a ctypes handle for libpq. Some common function prototypes are @@ -189,7 +248,7 @@ def _parse_array(value: str, elem_oid: int): current_element.append(next_char) pos += 2 continue - elif char == '"': + if char == '"': in_quotes = False else: current_element.append(char) @@ -281,9 +340,10 @@ def simplify_query_results(results) -> Any: class PGresult(contextlib.AbstractContextManager): """Wraps a raw _PGresult_p with a more friendly interface.""" - def __init__(self, lib: ctypes.CDLL, res: _PGresult_p): + def __init__(self, lib: ctypes.CDLL, res: _PGresult_p): # type: ignore[valid-type] self._lib = lib - self._res = res + # Cleared to None on __exit__ once the result has been freed. + self._res: Optional[_PGresult_p] = res # type: ignore[valid-type] def __exit__(self, *exc): self._lib.PQclear(self._res) @@ -302,7 +362,7 @@ def _get_error_field(self, field: DiagField) -> Optional[str]: val = self._lib.PQresultErrorField(self._res, int(field)) return val.decode() if val else None - def raise_error(self) -> None: + def raise_error(self) -> NoReturn: """ Raises LibpqError with diagnostic information from the result. """ @@ -374,25 +434,26 @@ class PGconn(contextlib.AbstractContextManager): def __init__( self, lib: ctypes.CDLL, - handle: _PGconn_p, + handle: _PGconn_p, # type: ignore[valid-type] stack: contextlib.ExitStack, ): self._lib = lib - self._handle = handle + # Cleared to None on __exit__ once the connection has been finished. + self._handle: Optional[_PGconn_p] = handle # type: ignore[valid-type] self._stack = stack def __exit__(self, *exc): self._lib.PQfinish(self._handle) self._handle = None - def exec(self, query: str): + def exec(self, query: str) -> PGresult: """ Executes a query via PQexec() and returns a PGresult. """ res = self._lib.PQexec(self._handle, query.encode()) return self._stack.enter_context(PGresult(self._lib, res)) - def sql(self, query: str): + def sql(self, query: str): # pylint: disable=inconsistent-return-statements """ Executes a query and raises an exception if it fails. Returns the query results with automatic type conversion and simplification. @@ -409,15 +470,13 @@ def sql(self, query: str): res = self.exec(query) status = res.status() - if status == ExecStatus.PGRES_FATAL_ERROR: - res.raise_error() - elif status == ExecStatus.PGRES_COMMAND_OK: + if status == ExecStatus.PGRES_COMMAND_OK: return None - elif status == ExecStatus.PGRES_TUPLES_OK: + if status == ExecStatus.PGRES_TUPLES_OK: results = res.fetch_all() return simplify_query_results(results) - else: - res.raise_error() + # PGRES_FATAL_ERROR and anything else: raise (raise_error is NoReturn). + res.raise_error() def connstr(opts: Dict[str, Any]) -> str: diff --git a/src/test/pytest/meson.build b/src/test/pytest/meson.build index b86be901e7c2e..17c3944bee6bd 100644 --- a/src/test/pytest/meson.build +++ b/src/test/pytest/meson.build @@ -11,6 +11,7 @@ tests += { 'pytest': { 'tests': [ 'pyt/test_errors.py', + 'pyt/test_fake.py', 'pyt/test_libpq.py', 'pyt/test_multi_server.py', 'pyt/test_query_helpers.py', diff --git a/src/test/pytest/pgtap.py b/src/test/pytest/pgtap.py index 2ae16b624d571..199888747563d 100644 --- a/src/test/pytest/pgtap.py +++ b/src/test/pytest/pgtap.py @@ -76,14 +76,23 @@ def pytest_configure(config): Hijacks the standard streams as soon as possible during pytest startup. The pytest-formatted output gets logged to file instead, and we'll use the original sys.__stdout__/__stderr__ streams for the TAP protocol. + + When TESTLOGDIR is unset (a standalone pytest run outside the meson/testwrap + harness) we leave the streams alone rather than failing, and creating the + log directory tolerates its prior existence -- the harness may have made it, + and an unconditional makedirs would raise FileExistsError (a pytest + INTERNALERROR, reported by meson as a generic ERROR rather than a test + result). """ logdir = os.getenv("TESTLOGDIR") if not logdir: - raise RuntimeError("pgtap requires the TESTLOGDIR envvar to be set") + return - os.makedirs(logdir) + os.makedirs(logdir, exist_ok=True) logpath = os.path.join(logdir, "pytest.log") - sys.stdout = sys.stderr = open(logpath, "a", buffering=1) + sys.stdout = sys.stderr = open( # pylint: disable=consider-using-with + logpath, "a", buffering=1, encoding="utf-8" + ) @pytest.hookimpl(trylast=True) diff --git a/src/test/pytest/pypg/__init__.py b/src/test/pytest/pypg/__init__.py index 4ee91289f7003..fed8a1ea9f999 100644 --- a/src/test/pytest/pypg/__init__.py +++ b/src/test/pytest/pypg/__init__.py @@ -1,10 +1,46 @@ # Copyright (c) 2025, PostgreSQL Global Development Group -from ._env import require_test_extras, skip_unless_test_extras +from ._env import ( + require_test_extras, + skip_unless_test_extras, + test_timeout_default, +) +from .command import CommandResult, PgBin +from .fake import faker, meaningful_text, rand_str +from .kerberos import KerberosServer from .server import PostgresServer +from .util import ( + wait_for_file, + compare_files, + check_pg_config, + scan_server_header, + append_to_file, + check_mode_recursive, + chmod_recursive, + get_free_port, + slurp_dir, + slurp_file, +) __all__ = [ "require_test_extras", "skip_unless_test_extras", + "test_timeout_default", + "faker", + "meaningful_text", + "rand_str", + "KerberosServer", "PostgresServer", + "PgBin", + "CommandResult", + "append_to_file", + "check_mode_recursive", + "chmod_recursive", + "get_free_port", + "slurp_file", + "slurp_dir", + "check_pg_config", + "scan_server_header", + "compare_files", + "wait_for_file", ] diff --git a/src/test/pytest/pypg/_env.py b/src/test/pytest/pypg/_env.py index c4087be32125f..02e91a686ef70 100644 --- a/src/test/pytest/pypg/_env.py +++ b/src/test/pytest/pypg/_env.py @@ -37,7 +37,7 @@ def test_some_ldap_feature(): pytestmark = pypg.require_test_extra("ssl", "kerberos") """ return pytest.mark.skipif( - not all([_has_test_extra(k) for k in keys]), + not all(_has_test_extra(k) for k in keys), reason=_test_extra_skip_reason(*keys), ) @@ -52,7 +52,7 @@ def my_fixture(): skip_unless_test_extras("ldap") ... """ - if not all([_has_test_extra(k) for k in keys]): + if not all(_has_test_extra(k) for k in keys): pytest.skip(_test_extra_skip_reason(*keys)) @@ -68,5 +68,5 @@ def test_timeout_default() -> int: try: return int(default) except ValueError as v: - logger.warning("PG_TEST_TIMEOUT_DEFAULT could not be parsed: " + str(v)) + logger.warning("PG_TEST_TIMEOUT_DEFAULT could not be parsed: %s", v) return 180 diff --git a/src/test/pytest/pypg/bgpsql.py b/src/test/pytest/pypg/bgpsql.py new file mode 100644 index 0000000000000..31855f00122bb --- /dev/null +++ b/src/test/pytest/pypg/bgpsql.py @@ -0,0 +1,247 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +An interactive psql session running in the background, mirroring +PostgreSQL::Test::BackgroundPsql. + +psql is run with `--file -`, reading from a pipe we keep open, so queries can be +fed incrementally. Two reader threads accumulate stdout/stderr into buffers that +the pump-until helpers poll for a pattern (the Python analog of IPC::Run::pump). +""" + +import re +import subprocess +import threading +import time +from typing import List, Optional + +from ._env import test_timeout_default + + +class BackgroundPsql: + """A long-lived psql session driven by feeding stdin and matching output.""" + + def __init__(self, cmd: List[str], env, timeout=None, wait=True): + self._cmd = cmd + self._env = env + self._timeout = timeout if timeout is not None else test_timeout_default() + self._proc: Optional[subprocess.Popen] = None + self._stdout = "" + self._stderr = "" + self._last_stderr = "" + self._lock = threading.Lock() + self._threads: List[threading.Thread] = [] + self._query_cnt = 1 + self._start() + if wait: + self.wait_connect() + + def _start(self): + # pylint: disable=consider-using-with # long-lived; closed in quit() + self._proc = subprocess.Popen( + self._cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self._env, + encoding="utf-8", + errors="replace", + bufsize=1, + ) + self._stdout = "" + self._stderr = "" + self._threads = [ + threading.Thread(target=self._reader, args=("out",), daemon=True), + threading.Thread(target=self._reader, args=("err",), daemon=True), + ] + for thread in self._threads: + thread.start() + + def _reader(self, which): + assert self._proc is not None + stream = self._proc.stdout if which == "out" else self._proc.stderr + assert stream is not None + for line in iter(stream.readline, ""): + with self._lock: + if which == "out": + self._stdout += line + else: + self._stderr += line + + def _send(self, text): + assert self._proc is not None and self._proc.stdin is not None + self._proc.stdin.write(text) + self._proc.stdin.flush() + + def send(self, text): + """Feed raw text to psql's stdin without waiting for output.""" + self._send(text) + + def signal(self, sig): + """Send a signal (e.g. signal.SIGINT) to the psql process.""" + assert self._proc is not None + self._proc.send_signal(sig) + + @property + def stdout(self): + """The accumulated stdout captured so far. + + Mirrors Perl's ``$session->{stdout}``: lets a test inspect output that a + fire-and-forget statement (sent via send()/query_until()) leaves behind + once it finishes, e.g. the final result of a blocking WAIT FOR after the + session is quit(). + """ + with self._lock: + return self._stdout + + @property + def stderr(self): + """The accumulated stderr captured so far.""" + with self._lock: + return self._stderr + + @property + def last_stderr(self): + """The stderr produced by the most recent query()/query_safe(). + + Mirrors Perl's ``$session->{stderr}`` immediately after ``$session->query``: + query() captures the statement's stderr separately and clears the live + buffer, so a test that needs to assert on (or match a regex against) the + error text of the just-run statement reads it here. + """ + with self._lock: + return self._last_stderr + + def _pump_until(self, want_out=None, want_err=None): + rx_out = re.compile(want_out) if want_out else None + rx_err = re.compile(want_err) if want_err else None + deadline = time.monotonic() + self._timeout + # Poll with an adaptive backoff: a tight initial interval keeps + # per-query latency low for fast local statements (workloads that + # issue thousands of round-trips), backing off to a coarse interval so + # genuinely long waits don't busy-spin. + interval = 0.0005 + while True: + with self._lock: + ok_out = rx_out is None or rx_out.search(self._stdout) + ok_err = rx_err is None or rx_err.search(self._stderr) + if ok_out and ok_err: + return + if time.monotonic() > deadline: + raise TimeoutError( + "background psql timed out waiting for " + "out={!r} err={!r}\nstdout:\n{}\nstderr:\n{}".format( + want_out, want_err, self._stdout, self._stderr + ) + ) + time.sleep(interval) + if interval < 0.02: + interval = min(interval * 2, 0.02) + + def wait_connect(self): + """Wait until psql is connected and ready to consume input.""" + banner = "background_psql: ready" + self._send("\\echo '{0}'\n\\warn '{0}'\n".format(banner)) + match = banner + r"\r?\n" + self._pump_until(want_out=match, want_err=match) + with self._lock: + self._stdout = "" + self._stderr = "" + + def query(self, query): + """Run query and return its output (waits for completion via a banner).""" + cnt = self._query_cnt + self._query_cnt += 1 + banner = "background_psql: QUERY_SEPARATOR {}:".format(cnt) + self._send("{q}\n;\n\\echo '{b}'\n\\warn '{b}'\n".format(q=query, b=banner)) + match = banner + r"\r?\n" + self._pump_until(want_out=match, want_err=match) + strip = r"\r?\n?" + re.escape(banner) + r"\r?\n" + with self._lock: + output = re.sub(strip, "", self._stdout) + self._last_stderr = re.sub(strip, "", self._stderr) + self._stderr = "" + self._stdout = "" + return output + + def query_safe(self, query): + """Run query and return its output, raising if psql reported an error. + + Mirrors PostgreSQL::Test::BackgroundPsql->query_safe: any ERROR/FATAL/ + PANIC on stderr from the statement is fatal to the test. + """ + output = self.query(query) + if re.search(r"^(?:ERROR|FATAL|PANIC):", self._last_stderr, re.MULTILINE): + raise RuntimeError( + "query_safe failed: {}\nquery was: {}".format( + self._last_stderr.strip(), query + ) + ) + return output + + def set_query_timer_restart(self): + """Reset the per-query timeout window. + + Mirrors BackgroundPsql->set_query_timer_restart. pypg recomputes the + deadline at the start of every query/pump, so the Perl timer-restart + behaviour is already the default; this is a no-op kept for parity. + """ + + def query_until(self, until, query=""): + """Send query and pump stdout until the until regex appears; return it.""" + if query: + self._send(query) + self._pump_until(want_out=until) + with self._lock: + ret = self._stdout + self._stdout = "" + return ret + + def wait_for_stderr(self, until, query=""): + """Send query and pump stderr until the until regex appears.""" + if query: + self._send(query) + self._pump_until(want_err=until) + with self._lock: + self._stderr = "" + + def clear(self): + """Discard any accumulated stdout/stderr.""" + with self._lock: + self._stdout = "" + self._stderr = "" + + def quit(self): + """Close the session, returning the psql exit code.""" + if self._proc is None: + return None + try: + if self._proc.stdin and not self._proc.stdin.closed: + self._send("\\q\n") + self._proc.stdin.close() + except (BrokenPipeError, OSError): + pass + try: + self._proc.wait(timeout=self._timeout) + except subprocess.TimeoutExpired: + self._proc.kill() + for thread in self._threads: + thread.join(timeout=1) + return self._proc.returncode + + finish = quit + + def restart(self): + """Quit (if needed) and start a fresh psql session with the same params.""" + self.quit() + self._start() + self.wait_connect() + + def reconnect_and_clear(self): + """Restart the session and discard buffered output. + + Mirrors PostgreSQL::Test::BackgroundPsql->reconnect_and_clear: used after + a recovery conflict terminates the backend, to get a fresh connection. + """ + self.restart() + self.clear() diff --git a/src/test/pytest/pypg/command.py b/src/test/pytest/pypg/command.py new file mode 100644 index 0000000000000..6a5fb6cff175f --- /dev/null +++ b/src/test/pytest/pypg/command.py @@ -0,0 +1,247 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +""" +Helpers for running PostgreSQL programs and asserting on their results. + +These mirror the command_* and program_* helpers from PostgreSQL::Test::Utils, +so Perl TAP tests can be ported with equivalent assertions. Binaries are +resolved against a bindir (prepended to PATH) so tests work whether or not the +install directory is already on PATH. +""" + +import os +import re +import subprocess +from collections import namedtuple +from typing import Dict, List, Optional, Sequence + +from .util import run_captured + +CommandResult = namedtuple("CommandResult", ["rc", "stdout", "stderr"]) + +# Programs are expected to keep --help output lines within this width. Matches +# PostgreSQL::Test::Utils::program_help_ok. +_MAX_HELP_LINE_LENGTH = 95 + + +def _argv(cmd: Sequence) -> List: + """Build an argv list. If any element is bytes (e.g. a non-UTF8 database + name), encode the rest to bytes too so the argv is homogeneous.""" + raw = list(cmd) + if any(isinstance(c, (bytes, bytearray)) for c in raw): + return [ + bytes(c) if isinstance(c, (bytes, bytearray)) else os.fsencode(str(c)) + for c in raw + ] + return [str(c) for c in raw] + + +def _describe(cmd: Sequence, result: CommandResult) -> str: + argv = " ".join( + c.decode("utf-8", "replace") if isinstance(c, (bytes, bytearray)) else str(c) + for c in _argv(cmd) + ) + return ( + f"command: {argv}\n" + f"exit code: {result.rc}\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + + +def _assert_msg(msg: Optional[str], what: str, cmd: Sequence, result: CommandResult): + prefix = f"{msg}: " if msg else "" + return prefix + what + "\n" + _describe(cmd, result) + + +class PgBin: + """ + Runs PostgreSQL client programs and asserts on exit code and output. + + Args: + bindir: PostgreSQL bin directory; prepended to PATH for each run. + extra_env: Extra environment variables (e.g. PGHOST/PGPORT for a node). + """ + + def __init__(self, bindir, extra_env: Optional[Dict[str, str]] = None): + self._bindir = bindir + self._extra_env = dict(extra_env) if extra_env else {} + + def _env(self, extra_env: Optional[Dict[str, str]]) -> Dict[str, str]: + env = dict(os.environ) + env["PATH"] = str(self._bindir) + os.pathsep + env.get("PATH", "") + env.update(self._extra_env) + if extra_env: + env.update(extra_env) + return env + + def result(self, cmd: Sequence, *, extra_env=None) -> CommandResult: + """Run cmd, capturing output. Never raises on a nonzero exit. + + Output is captured through temporary files rather than subprocess pipes + (see util.run_captured): a program that starts a server -- e.g. + pg_basebackup or pg_ctl start -- leaves a postmaster holding the pipe's + write end open, which would deadlock a pipe read to EOF. + + Co-authored-by: Andrew Dunstan + """ + returncode, stdout, stderr = run_captured(_argv(cmd), env=self._env(extra_env)) + return CommandResult(returncode, stdout, stderr) + + def popen(self, cmd: Sequence, *, extra_env=None) -> subprocess.Popen: + """Start cmd as a long-lived background process (PATH set to bindir). + + The caller is responsible for terminating/waiting on it (e.g. via + send_signal + wait). stdout/stderr are discarded. + """ + return subprocess.Popen( # pylint: disable=consider-using-with + _argv(cmd), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=self._env(extra_env), + ) + + def run_redirect_stderr(self, cmd, stderr_path) -> int: + """Run cmd, appending its stderr to stderr_path; return the exit code. + + Used by pg_rewind's growing-file test, where stderr is redirected into + the very file being copied so that file grows during the copy. + """ + with open(stderr_path, "a", encoding="utf-8") as fh: + return subprocess.run( + _argv(cmd), + stdout=subprocess.DEVNULL, + stderr=fh, + env=self._env(None), + check=False, + ).returncode + + def run_command(self, cmd: Sequence, *, extra_env=None) -> CommandResult: + """Run cmd capturing chomped output, mirroring Utils::run_command. + + Both stdout and stderr have a single trailing newline removed, like the + Perl helper, so equality checks against the captured strings match. + """ + result = self.result(cmd, extra_env=extra_env) + return CommandResult( + result.rc, + result.stdout.removesuffix("\n"), + result.stderr.removesuffix("\n"), + ) + + def command_ok(self, cmd, msg=None, *, extra_env=None) -> CommandResult: + """Assert the command exits with code 0.""" + result = self.result(cmd, extra_env=extra_env) + assert result.rc == 0, _assert_msg(msg, "expected success", cmd, result) + return result + + def command_fails(self, cmd, msg=None, *, extra_env=None) -> CommandResult: + """Assert the command exits with a nonzero code.""" + result = self.result(cmd, extra_env=extra_env) + assert result.rc != 0, _assert_msg(msg, "expected failure", cmd, result) + return result + + def command_exit_is(self, cmd, code, msg=None, *, extra_env=None) -> CommandResult: + """Assert the command exits with the given code.""" + result = self.result(cmd, extra_env=extra_env) + assert result.rc == code, _assert_msg(msg, f"expected exit {code}", cmd, result) + return result + + def command_like(self, cmd, pattern, msg=None, *, extra_env=None) -> CommandResult: + """Assert success and that stdout matches pattern.""" + result = self.result(cmd, extra_env=extra_env) + assert result.rc == 0, _assert_msg(msg, "expected success", cmd, result) + assert re.search(pattern, result.stdout), _assert_msg( + msg, f"stdout did not match {pattern!r}", cmd, result + ) + return result + + def command_fails_like(self, cmd, pattern, msg=None, *, extra_env=None): + """Assert failure and that stderr matches pattern.""" + result = self.result(cmd, extra_env=extra_env) + assert result.rc != 0, _assert_msg(msg, "expected failure", cmd, result) + assert re.search(pattern, result.stderr), _assert_msg( + msg, f"stderr did not match {pattern!r}", cmd, result + ) + return result + + def command_ok_or_fails_like( + self, cmd, expected_stdout, expected_stderr, msg=None, *, extra_env=None + ): + """Run cmd; if it fails, assert its stdout/stderr match the patterns. + + Mirrors PostgreSQL::Test::Utils::command_ok_or_fails_like: a successful + run is accepted with no output checks (returns True); a failed run must + have stdout matching expected_stdout and stderr matching + expected_stderr (returns False). Used where a command may legitimately + be unsupported on the platform (e.g. pg_upgrade --clone). + """ + result = self.result(cmd, extra_env=extra_env) + if result.rc != 0: + assert re.search(expected_stdout, result.stdout), _assert_msg( + msg, f"stdout did not match {expected_stdout!r}", cmd, result + ) + assert re.search(expected_stderr, result.stderr), _assert_msg( + msg, f"stderr did not match {expected_stderr!r}", cmd, result + ) + return False + return True + + def command_checks_all(self, cmd, exit_code, stdout_res, stderr_res, msg=None): + """Assert the exit code and that every stdout/stderr regex matches.""" + result = self.result(cmd) + assert result.rc == exit_code, _assert_msg( + msg, f"expected exit {exit_code}", cmd, result + ) + for pattern in stdout_res: + assert re.search(pattern, result.stdout), _assert_msg( + msg, f"stdout did not match {pattern!r}", cmd, result + ) + for pattern in stderr_res: + assert re.search(pattern, result.stderr), _assert_msg( + msg, f"stderr did not match {pattern!r}", cmd, result + ) + return result + + def program_help_ok(self, name): + """--help exits 0, writes stdout, nothing to stderr, lines <= 95 chars.""" + cmd = [name, "--help"] + result = self.result(cmd) + assert result.rc == 0, _describe(cmd, result) + assert result.stdout != "", f"{name} --help produced no stdout" + assert result.stderr == "", f"{name} --help wrote to stderr:\n{result.stderr}" + long_lines = [ + ln for ln in result.stdout.splitlines() if len(ln) > _MAX_HELP_LINE_LENGTH + ] + assert not long_lines, "help lines exceed length limit:\n" + "\n".join( + long_lines + ) + return result + + def program_version_ok(self, name): + """--version exits 0, writes stdout, nothing to stderr.""" + cmd = [name, "--version"] + result = self.result(cmd) + assert result.rc == 0, _describe(cmd, result) + assert result.stdout != "", f"{name} --version produced no stdout" + assert result.stderr == "", f"{name} --version wrote stderr:\n{result.stderr}" + return result + + def program_options_handling_ok(self, name): + """An invalid option gives a nonzero exit and an error message.""" + cmd = [name, "--not-a-valid-option"] + result = self.result(cmd) + assert result.rc != 0, f"{name} accepted an invalid option" + assert result.stderr != "", f"{name} printed no error for an invalid option" + return result + + def check_pg_config(self, regexp): + """Return True if a line in the installed pg_config.h matches regexp. + + Mirrors PostgreSQL::Test::Utils::check_pg_config (the pattern is + anchored at the start of the line). + """ + includedir = self.result(["pg_config", "--includedir"]).stdout.strip() + header = os.path.join(includedir, "pg_config.h") + with open(header, encoding="utf-8", errors="replace") as f: + return any(re.match(regexp, line) for line in f) diff --git a/src/test/pytest/pypg/fake.py b/src/test/pytest/pypg/fake.py new file mode 100644 index 0000000000000..e44aa12d1f41a --- /dev/null +++ b/src/test/pytest/pypg/fake.py @@ -0,0 +1,103 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +"""Random and (optionally) realistic fake test-data generation. + +``rand_str`` mirrors the ``randStr`` helper used across PostgreSQL TAP tests: a +uniform random string over ``[A-Za-z0-9]`` of a given length. For +realistic-looking ("meaningful") fake data -- names, emails, addresses, +sentences -- this module optionally uses the third-party ``faker`` library. +Faker is not a hard dependency: when it is not installed, :func:`faker` returns +``None`` after a single :class:`RuntimeWarning`, and :func:`meaningful_text` +transparently falls back to :func:`rand_str`. Install it with +``uv sync --extra fake`` (or ``pip install faker``). +""" + +import random +import string +import warnings + +# [A-Z][a-z][0-9] -- exactly the character set of the Perl TAP randStr helper: +# my @chars = ("A" .. "Z", "a" .. "z", "0" .. "9"); +DEFAULT_CHARSET = string.ascii_uppercase + string.ascii_lowercase + string.digits + +_warned = set() + + +def rand_str(length, charset=DEFAULT_CHARSET): + """Return a random string of ``length`` characters drawn uniformly from + ``charset`` (default ``[A-Za-z0-9]``). + + Equivalent to the Perl TAP ``randStr`` subroutine:: + + sub randStr { + my $len = shift; + my @chars = ("A" .. "Z", "a" .. "z", "0" .. "9"); + return join '', map { $chars[ rand @chars ] } 1 .. $len; + } + + Each character is chosen independently and uniformly, matching Perl's + ``$chars[rand @chars]``. + + Args: + length: Number of characters to generate (must be non-negative). + charset: Characters to draw from (must be non-empty). + + Returns: + A freshly generated random string of the requested length. + """ + if length < 0: + raise ValueError("length must be non-negative") + if not charset: + raise ValueError("charset must be non-empty") + return "".join(random.choice(charset) for _ in range(length)) + + +def faker(locale=None, seed=None): + """Return a ``Faker`` instance for realistic fake data, or ``None``. + + If the optional ``faker`` package is not installed, returns ``None`` after + emitting a single :class:`RuntimeWarning` (subsequent calls are silent). + Install it with ``uv sync --extra fake`` (or ``pip install faker``). + + Args: + locale: Optional Faker locale (e.g. ``"de_DE"``), passed through to + ``Faker(locale)``. + seed: If given, seed Faker for reproducible output (``Faker.seed``). + + Returns: + A ``Faker`` instance, or ``None`` when Faker is unavailable. + """ + try: + # faker is an optional dependency (the `fake` extra); ignore if absent. + import faker # pylint: disable=import-outside-toplevel # pyrefly: ignore + except ImportError: + if "faker" not in _warned: + _warned.add("faker") + warnings.warn( + "Faker is not installed; meaningful fake-data generation is " + "unavailable (falling back to random strings). Install it with " + "`uv sync --extra fake` or `pip install faker`.", + RuntimeWarning, + stacklevel=2, + ) + return None + if seed is not None: + faker.Faker.seed(seed) + return faker.Faker(locale) + + +def meaningful_text(max_chars=200, locale=None): + """Return realistic-looking text via Faker, or a random fallback. + + Uses ``Faker.text`` when Faker is installed; otherwise falls back to + :func:`rand_str` (after :func:`faker` issues its one-time warning) so + callers always get a usable string. + + Args: + max_chars: Approximate maximum length of the generated text. + locale: Optional Faker locale. + """ + fake = faker(locale=locale) + if fake is None: + return rand_str(max_chars) + return fake.text(max_nb_chars=max_chars) diff --git a/src/test/pytest/pypg/fixtures.py b/src/test/pytest/pypg/fixtures.py index 39d72a3cb3866..fea8ae50983af 100644 --- a/src/test/pytest/pypg/fixtures.py +++ b/src/test/pytest/pypg/fixtures.py @@ -12,14 +12,38 @@ from ._env import test_timeout_default from .util import capture from .server import PostgresServer +from .rewind import RewindTest +from .command import PgBin -from libpq import load_libpq_handle, connect as libpq_connect +from libpq import ( + load_libpq_handle, + libpq_abi_skip_reason, + connect as libpq_connect, +) # Stash key for tracking servers for log reporting. _servers_key = pytest.StashKey[List[PostgresServer]]() +@pytest.fixture(scope="session", autouse=True) +def _check_libpq_abi(libdir, bindir): + """Skip the suite when this Python cannot load the build's libpq. + + The in-process libpq layer is loaded via ctypes, so the interpreter must + match libpq's ABI. A 64-bit Python cannot dlopen the 32-bit libpq from a + -m32 build, which would otherwise fail every test; skip with a clear reason + instead. (The 64-bit ASan build is handled separately by preloading the + ASan runtime in CI -- see the Test world step in pg-ci.yml.) See + libpq.libpq_abi_skip_reason. + + Co-authored-by: Andrew Dunstan + """ + reason = libpq_abi_skip_reason(libdir, bindir) + if reason: + pytest.skip(reason) + + def _record_server_for_log_reporting(request, server): """Record a server for log reporting on test failure.""" if _servers_key not in request.node.stash: @@ -74,6 +98,22 @@ def libpq_handle(libdir, bindir): raise +@pytest.fixture(scope="session") +def pg_bin(bindir): + """ + A PgBin for running PostgreSQL client programs and asserting on their + results, with bindir on PATH. Use for program-level checks that don't need + a specific server connection (e.g. pg_bin.program_help_ok("pg_ctl")). + """ + return PgBin(bindir) + + +@pytest.fixture +def rewind_test(create_pg, pg_bin, tmp_path): + """A RewindTest driver bound to this test's create_pg/pg_bin/tmp_path.""" + return RewindTest(create_pg, pg_bin, tmp_path) + + @pytest.fixture def connect(libpq_handle, remaining_timeout): """ @@ -129,11 +169,8 @@ def tmp_check(tmp_path_factory) -> pathlib.Path: """ d = os.getenv("TESTDATADIR") if d: - d = pathlib.Path(d) - else: - d = tmp_path_factory.mktemp("tmp_check") - - return d + return pathlib.Path(d) + return tmp_path_factory.mktemp("tmp_check") @pytest.fixture(scope="session") @@ -244,7 +281,7 @@ def test_multiple_servers(create_pg): """ servers = [] - def _create(name=None, **kwargs): + def _create(name=None, *, start=True, **kwargs): if name is None: count = len(servers) + 1 name = f"pg{count}" @@ -254,7 +291,8 @@ def _create(name=None, **kwargs): servers.append(server) _record_server_for_log_reporting(request, server) server.set_timeout(remaining_timeout) - server.start() + if start: + server.start() return server yield _create @@ -295,7 +333,7 @@ def shared_nodes(create_pg_module): return [create_pg_module() for _ in range(3)] """ - def _create(name=None, **kwargs): + def _create(name=None, *, start=True, **kwargs): if name is None: count = len(_module_scoped_servers) + 1 name = f"pg{count}" @@ -304,7 +342,8 @@ def _create(name=None, **kwargs): _module_scoped_servers.append(server) _record_server_for_log_reporting(request, server) server.set_timeout(remaining_timeout_module) - server.start() + if start: + server.start() return server yield _create diff --git a/src/test/pytest/pypg/interactive.py b/src/test/pytest/pypg/interactive.py new file mode 100644 index 0000000000000..5c3f5205cdd7c --- /dev/null +++ b/src/test/pytest/pypg/interactive.py @@ -0,0 +1,112 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""A PTY-backed interactive psql session. + +Python analog of PostgreSQL::Test::BackgroundPsql in interactive mode (which +Perl drives via IO::Pty). psql is run on a pseudo-terminal so it believes it is +interactive and enables readline/libedit (needed for tab-completion and +line-editing tests). The single combined output stream (stdout+stderr on the +PTY) is accumulated and polled for a regex, mirroring query_until. +""" + +import os +import pty +import re +import select +import struct +import subprocess +import termios +import time +import fcntl + +from ._env import test_timeout_default + + +class InteractivePsql: + """An interactive (PTY) psql session driven by sending input + matching output. + + Mirrors the interactive form of PostgreSQL::Test::BackgroundPsql: send raw + bytes (including tab/control characters) and wait until the accumulated + terminal output matches a regex. + """ + + def __init__(self, cmd, env, timeout=None): + self._cmd = cmd + self._env = env + self._timeout = timeout if timeout is not None else test_timeout_default() + self._master, slave = pty.openpty() + self._proc = subprocess.Popen( # pylint: disable=consider-using-with + cmd, + stdin=slave, + stdout=slave, + stderr=slave, + env=env, + close_fds=True, + ) + os.close(slave) + self._buf = "" + self.timed_out = False + + def set_winsize(self, rows, cols): + """Set the terminal window size (rows x cols) for pagination tests.""" + fcntl.ioctl( + self._master, termios.TIOCSWINSZ, struct.pack("HHHH", rows, cols, 0, 0) + ) + + def set_query_timer_restart(self): + """No-op timer reset (kept for parity with BackgroundPsql).""" + + def _drain(self, deadline): + while time.monotonic() < deadline: + ready, _, _ = select.select([self._master], [], [], 0.05) + if not ready: + continue + try: + chunk = os.read(self._master, 4096) + except OSError: + return + if not chunk: + return + self._buf += chunk.decode("utf-8", errors="replace") + return + + def query_until(self, pattern, send): + """Send input, then read until the accumulated output matches pattern. + + Returns the output consumed up to and including the match (then clears + the buffer). Sets timed_out and returns what was seen on timeout. + """ + regex = pattern if hasattr(pattern, "search") else re.compile(pattern) + if send: + os.write(self._master, send.encode("utf-8")) + deadline = time.monotonic() + self._timeout + self.timed_out = False + while True: + if regex.search(self._buf): + out = self._buf + self._buf = "" + return out + if time.monotonic() > deadline: + self.timed_out = True + out = self._buf + self._buf = "" + return out + self._drain(deadline) + + def send(self, data): + """Write raw data to the terminal without waiting for output.""" + os.write(self._master, data.encode("utf-8")) + + def quit(self): + """Close the session (send \\q and EOF), returning the exit code.""" + try: + os.write(self._master, "\\q\n".encode("utf-8")) + except OSError: + pass + try: + self._proc.wait(timeout=self._timeout) + except subprocess.TimeoutExpired: + self._proc.kill() + self._proc.wait() + os.close(self._master) + return self._proc.returncode diff --git a/src/test/pytest/pypg/kerberos.py b/src/test/pytest/pypg/kerberos.py new file mode 100644 index 0000000000000..4afd6b31a1356 --- /dev/null +++ b/src/test/pytest/pypg/kerberos.py @@ -0,0 +1,337 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Stand-alone KDC for testing PostgreSQL GSSAPI / Kerberos functionality. + +This is the Python port of ``src/test/perl/PostgreSQL/Test/Kerberos.pm``. It +locates the MIT krb5 binaries, writes ``krb5.conf`` + ``kdc.conf``, creates a +KDC realm/database, adds the PostgreSQL service principal plus arbitrary test +user principals, starts ``krb5kdc`` and tears it all down. + +Like the Perl module it sets the ``KRB5_CONFIG``, ``KRB5_KDC_PROFILE`` and +``KRB5CCNAME`` environment variables so that every subprocess (psql, the +server, kinit, ...) uses this test realm rather than any global configuration. +""" + +import os +import pathlib +import re +import shutil +import signal +import subprocess +import sys +from typing import List, Optional + +from .util import append_to_file, get_free_port + + +def _eprint(*args) -> None: + """Print a diagnostic line to stderr (mirrors note()/diag() in Perl).""" + print(*args, file=sys.stderr) + + +def _which(name: str) -> str: + """Resolve a krb5 binary from PATH, raising if it cannot be found. + + The harness relies on the krb5 bin/sbin directories being on PATH (the + Perl module hard-codes platform-specific directories; we resolve via PATH + so the same code works for the nix store path and system installs). + """ + found = shutil.which(name) + if found is None: + raise FileNotFoundError( + "could not find krb5 binary {!r} on PATH; ensure the krb5 " + "bin/sbin directories are on PATH".format(name) + ) + return found + + +def _detect_krb5_version() -> float: + """Return the MIT krb5 minor release as a float (e.g. 1.21 -> 1.21). + + Mirrors Kerberos.pm: run ``krb5-config --version`` and parse the + ``Kerberos 5 release X.Y`` line, bailing on Heimdal. If ``krb5-config`` is + not installed (it ships only with the krb5 dev package) we fall back to + ``krb5kdc``'s usage banner, and finally to a conservative 1.15 so the + newer ``kdc_listen``/``kdc_tcp_listen`` settings are used. + """ + krb5_config = shutil.which("krb5-config") + if krb5_config is not None: + proc = subprocess.run( + [krb5_config, "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + encoding="utf-8", + check=False, + ) + out = proc.stdout + if "heimdal" in out.lower(): + raise RuntimeError("Heimdal is not supported") + match = re.search(r"Kerberos 5 release ([0-9]+\.[0-9]+)", out) + if match: + return float(match.group(1)) + + # krb5-config absent: probe krb5kdc's own version/usage text. + krb5kdc = shutil.which("krb5kdc") + if krb5kdc is not None: + proc = subprocess.run( + [krb5kdc, "-r", "VERSION_PROBE", "-n"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + encoding="utf-8", + check=False, + timeout=5, + ) + match = re.search(r"release ([0-9]+\.[0-9]+)", proc.stdout) + if match: + return float(match.group(1)) + + # Conservative default: the _listen settings exist since krb5 1.15, which + # has been the norm for many years. + return 1.15 + + +class KerberosServer: + """A running stand-alone KDC and its generated configuration. + + Create with :func:`KerberosServer.setup`; the realm is configured, the + service principal added and ``krb5kdc`` started. Call + :meth:`create_principal` to add user principals, :meth:`create_ticket` to + run ``kinit`` for one of them, and :meth:`stop` (or use as a context + manager) to shut the KDC down. + + Attributes: + keytab: Path to the service principal keytab (krb_server_keyfile). + krb5_conf: Path to the generated krb5.conf. + kdc_conf: Path to the generated kdc.conf. + krb5_cache: Path to the credentials cache (KRB5CCNAME). + realm: The Kerberos realm name. + kdc_port: The TCP/UDP port the KDC listens on. + """ + + def __init__( + self, + tmp_check: pathlib.Path, + log_path: pathlib.Path, + host: str, + hostaddr: str, + realm: str, + ): + """Initialize paths and binary locations; does not start anything. + + Args: + tmp_check: Directory for generated config, cache and KDC database. + log_path: Directory for the krb5 library and KDC log files. + host: Hostname used in the PostgreSQL service principal. + hostaddr: Interface address the KDC listens on. + realm: Kerberos realm name. + """ + self.host = host + self.hostaddr = hostaddr + self.realm = realm + + self._krb5_config = shutil.which("krb5-config") or "krb5-config" + self._kinit = _which("kinit") + self._klist = _which("klist") + self._kdb5_util = _which("kdb5_util") + self._kadmin_local = _which("kadmin.local") + self._krb5kdc = _which("krb5kdc") + + self.krb5_conf = tmp_check / "krb5.conf" + self.kdc_conf = tmp_check / "kdc.conf" + self.krb5_cache = tmp_check / "krb5cc" + self._krb5_log = log_path / "krb5libs.log" + self._kdc_log = log_path / "krb5kdc.log" + self.kdc_port = get_free_port() + self._kdc_datadir = tmp_check / "krb5kdc" + self._kdc_pidfile = tmp_check / "krb5kdc.pid" + self.keytab = tmp_check / "krb5.keytab" + self._stopped = False + + @classmethod + def setup( + cls, + tmp_check: pathlib.Path, + log_path: pathlib.Path, + host: str, + hostaddr: str, + realm: str, + ) -> "KerberosServer": + """Build the realm, add the service principal and start krb5kdc. + + Mirrors ``PostgreSQL::Test::Kerberos->new``: assigns a free port for the + KDC, writes the config, creates the KDC database with a master key, + adds the ``$with_krb_srvnam/$host`` service principal, extracts its + keytab and launches ``krb5kdc``. The required ``KRB5_CONFIG``, + ``KRB5_KDC_PROFILE`` and ``KRB5CCNAME`` environment variables are set as + a side effect so all child processes use this test realm. + """ + self = cls(tmp_check, log_path, host, hostaddr, realm) + self._write_config() + self._kdc_datadir.mkdir() + + # Ensure we use the test's config and cache files, not global ones. + os.environ["KRB5_CONFIG"] = str(self.krb5_conf) + os.environ["KRB5_KDC_PROFILE"] = str(self.kdc_conf) + os.environ["KRB5CCNAME"] = str(self.krb5_cache) + + krb_srvnam = os.environ.get("with_krb_srvnam", "postgres") + service_principal = "{}/{}".format(krb_srvnam, host) + + self._run_or_bail([self._kdb5_util, "create", "-s", "-P", "secret0"]) + self._run_or_bail( + [self._kadmin_local, "-q", "addprinc -randkey " + service_principal] + ) + self._run_or_bail( + [ + self._kadmin_local, + "-q", + "ktadd -k {} {}".format(self.keytab, service_principal), + ] + ) + self._run_or_bail([self._krb5kdc, "-P", str(self._kdc_pidfile)]) + return self + + def _write_config(self) -> None: + """Write krb5.conf and kdc.conf for this realm (mirrors Kerberos.pm). + + DNS realm/KDC lookups and reverse DNS are explicitly disabled, the + non-standard KDC port is pinned, and for krb5 >= 1.15 the bind is + restricted to the test interface via kdc_listen/kdc_tcp_listen. + + dns_canonicalize_hostname is disabled so the GSSAPI client uses the + literal service hostname for the SPN instead of doing a forward DNS + lookup. The test's service host (auth-test-localhost...example.com) does + not resolve, so without this each GSS connection blocks ~20s on a + resolver timeout; upstream CI sidesteps this by putting the name in + /etc/hosts, which is not writable here. The keytab principal already + uses the literal name, so authentication is unchanged. + """ + append_to_file( + self.krb5_conf, + "[logging]\n" + "default = FILE:{krb5_log}\n" + "kdc = FILE:{kdc_log}\n" + "\n" + "[libdefaults]\n" + "dns_lookup_realm = false\n" + "dns_lookup_kdc = false\n" + "dns_canonicalize_hostname = false\n" + "default_realm = {realm}\n" + "forwardable = false\n" + "rdns = false\n" + "\n" + "[realms]\n" + "{realm} = {{\n" + " kdc = {hostaddr}:{kdc_port}\n" + "}}\n".format( + krb5_log=self._krb5_log, + kdc_log=self._kdc_log, + realm=self.realm, + hostaddr=self.hostaddr, + kdc_port=self.kdc_port, + ), + ) + + append_to_file(self.kdc_conf, "[kdcdefaults]\n") + + krb5_version = _detect_krb5_version() + if krb5_version >= 1.15: + append_to_file( + self.kdc_conf, + "kdc_listen = {hostaddr}:{kdc_port}\n" + "kdc_tcp_listen = {hostaddr}:{kdc_port}\n".format( + hostaddr=self.hostaddr, kdc_port=self.kdc_port + ), + ) + else: + append_to_file( + self.kdc_conf, + "kdc_ports = {kdc_port}\n" + "kdc_tcp_ports = {kdc_port}\n".format(kdc_port=self.kdc_port), + ) + + append_to_file( + self.kdc_conf, + "\n" + "[realms]\n" + "{realm} = {{\n" + " database_name = {datadir}/principal\n" + " admin_keytab = FILE:{datadir}/kadm5.keytab\n" + " acl_file = {datadir}/kadm5.acl\n" + " key_stash_file = {datadir}/_k5.{realm}\n" + "}}".format(realm=self.realm, datadir=self._kdc_datadir), + ) + + def create_principal(self, principal: str, password: str) -> None: + """Add a user principal with a fixed password (mirrors create_principal).""" + self._run_or_bail( + [ + self._kadmin_local, + "-q", + "addprinc -pw {} {}".format(password, principal), + ] + ) + + def create_ticket( + self, principal: str, password: str, *, forwardable: bool = False + ) -> None: + """Obtain a TGT for principal via kinit (mirrors create_ticket). + + With ``forwardable=True`` the ``-f`` flag is passed so the ticket can be + delegated. The password is supplied on kinit's stdin; ``klist -f`` is + then run for diagnostics, exactly as the Perl module does. + """ + cmd = [self._kinit, principal] + if forwardable: + cmd.append("-f") + self._run_or_bail(cmd, stdin=password) + self._run_or_bail([self._klist, "-f"]) + + def _run_or_bail(self, cmd: List[str], stdin: Optional[str] = None) -> None: + """Run a krb5 command, echoing it and raising with output on failure. + + Mirrors system_or_bail/run_log: the command is logged to stderr, and on + a non-zero exit a RuntimeError carrying stdout+stderr is raised so the + KDC's complaint (realm casing, FQDN, port binding, ...) is visible. + """ + _eprint("+ " + " ".join(str(c) for c in cmd)) + proc = subprocess.run( + [str(c) for c in cmd], + input=stdin, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + encoding="utf-8", + check=False, + ) + if proc.stdout: + _eprint(proc.stdout) + if proc.returncode != 0: + raise RuntimeError( + "command failed (exit {}): {}\noutput:\n{}".format( + proc.returncode, " ".join(str(c) for c in cmd), proc.stdout + ) + ) + + def stop(self) -> None: + """Stop the KDC by signalling the pid in its pidfile (mirrors END). + + Sends SIGINT to the daemonized krb5kdc, taking care to be idempotent so + it is safe to call from a fixture teardown and a context-manager exit. + """ + if self._stopped: + return + self._stopped = True + try: + pid = int(self._kdc_pidfile.read_text(encoding="utf-8").strip()) + except (OSError, ValueError): + return + try: + os.kill(pid, signal.SIGINT) + except ProcessLookupError: + pass + + def __enter__(self) -> "KerberosServer": + return self + + def __exit__(self, *exc) -> None: + self.stop() diff --git a/src/test/pytest/pypg/rewind.py b/src/test/pytest/pypg/rewind.py new file mode 100644 index 0000000000000..ddbeabc9ee5b1 --- /dev/null +++ b/src/test/pytest/pypg/rewind.py @@ -0,0 +1,214 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Helper for the pg_rewind test suite, mirroring src/bin/pg_rewind/t/RewindTest.pm. + +A RewindTest instance owns a primary and a standby and drives the standard +pg_rewind scenario: set up a primary (with a minimal-privilege rewind_user), +start it, create a streaming standby, optionally promote the standby so the +primary diverges, then rewind the old primary from the standby and restart it. + +Only the 'local' and 'remote' source modes are implemented (the 'archive' mode +additionally needs enable_restoring/RecursiveCopy and is not yet ported). +""" + +import shutil + + +GRANT_REWIND_USER = """ +CREATE ROLE rewind_user LOGIN; +GRANT EXECUTE ON function pg_catalog.pg_ls_dir(text, boolean, boolean) + TO rewind_user; +GRANT EXECUTE ON function pg_catalog.pg_stat_file(text, boolean) + TO rewind_user; +GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text) + TO rewind_user; +GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, boolean) + TO rewind_user;""" + + +class RewindTest: + """Stateful driver for a pg_rewind primary/standby scenario.""" + + def __init__(self, create_pg, pg_bin, tmp_path): + self._create_pg = create_pg + self._pg_bin = pg_bin + self._tmp_path = tmp_path + self.primary = None + self.standby = None + self._group_access = False + + def setup_cluster(self, extra_name=None, extra=None): + """Initialize the primary (checksums, streaming, rewind_user auth).""" + name = "primary" + ("_" + extra_name if extra_name else "") + self._group_access = bool(extra and "-g" in extra) + self.primary = self._create_pg( + name, + allows_streaming=True, + auth_extra=["--create-role", "rewind_user"], + extra=extra, + start=False, + ) + self.primary.append_conf( + "wal_keep_size = 320MB\nallow_in_place_tablespaces = on\n" + ) + + def start_primary(self): + """Start the primary and create the minimal-privilege rewind_user.""" + assert self.primary is not None + self.primary.start() + self.primary.safe_psql(GRANT_REWIND_USER) + + def create_standby(self, extra_name=None): + """Back up the primary and bring up a streaming standby from it.""" + assert self.primary is not None + name = "standby" + ("_" + extra_name if extra_name else "") + self.primary.backup("my_backup") + self.standby = self._create_pg( + name, from_backup=(self.primary, "my_backup"), start=False + ) + self.standby.append_conf( + "primary_conninfo='{}'\n".format(self.primary.connstr()) + ) + self.standby.set_standby_mode() + self.standby.start() + + def promote_standby(self): + """Wait for the standby to catch up, then promote it (primary diverges).""" + assert self.primary is not None and self.standby is not None + self.primary.wait_for_catchup(self.standby, "write") + self.standby.promote() + + def run_pg_rewind(self, test_mode): + """Rewind the old primary from the standby in 'local', 'remote' or + 'archive' mode.""" + assert self.primary is not None and self.standby is not None + primary_pgdata = self.primary.datadir + standby_pgdata = self.standby.datadir + standby_connstr = self.standby.connstr("postgres") + " user=rewind_user" + conf_tmp = self._tmp_path / "primary-postgresql.conf.tmp" + + if test_mode == "archive": + # WAL files are moved to the archive; stop gracefully so a clean + # restart is still possible (--no-ensure-shutdown is used below). + self.primary.stop() + else: + # The primary must finish recovery once; pg_rewind ensures that. + self.primary.stop("immediate") + + # Keep a copy of postgresql.conf; pg_rewind overwrites it. + shutil.copy(primary_pgdata / "postgresql.conf", conf_tmp) + + if test_mode == "local": + self.standby.stop() + self._pg_bin.command_ok( + [ + "pg_rewind", + "--debug", + "--source-pgdata", + str(standby_pgdata), + "--target-pgdata", + str(primary_pgdata), + "--no-sync", + "--config-file", + str(conf_tmp), + ], + "pg_rewind local", + ) + elif test_mode == "remote": + self._pg_bin.command_ok( + [ + "pg_rewind", + "--debug", + "--source-server", + standby_connstr, + "--target-pgdata", + str(primary_pgdata), + "--no-sync", + "--write-recovery-conf", + "--config-file", + str(conf_tmp), + ], + "pg_rewind remote", + ) + auto = (primary_pgdata / "postgresql.auto.conf").read_text(encoding="utf-8") + assert "dbname=postgres" in auto, "recovery conf file sets dbname" + assert ( + primary_pgdata / "standby.signal" + ).exists(), "standby.signal created after pg_rewind" + self.standby.safe_psql("ALTER ROLE rewind_user WITH REPLICATION;") + elif test_mode == "archive": + # Source is a local pgdata; WAL is supplied from the target's + # archive via restore_command (--restore-target-wal). Move all WAL + # segments from the (gracefully stopped) old primary to its archive. + archive_dir = self.primary.archive_dir + wal_dir = primary_pgdata / "pg_wal" + if archive_dir.exists(): + shutil.rmtree(archive_dir) + shutil.copytree(wal_dir, archive_dir) + shutil.rmtree(wal_dir) + wal_dir.mkdir() + archive_dir.chmod(0o700) + wal_dir.chmod(0o700) + # Add restore_command to the target cluster (restore from itself). + self.primary._enable_restoring( # pylint: disable=protected-access + self.primary, standby=False + ) + self.standby.stop() + self._pg_bin.command_ok( + [ + "pg_rewind", + "--debug", + "--source-pgdata", + str(standby_pgdata), + "--target-pgdata", + str(primary_pgdata), + "--no-sync", + "--no-ensure-shutdown", + "--restore-target-wal", + "--config-file", + str(primary_pgdata / "postgresql.conf"), + ], + "pg_rewind archive", + ) + else: + raise ValueError("unsupported pg_rewind test mode: {}".format(test_mode)) + + # Restore the saved postgresql.conf. + shutil.move(str(conf_tmp), str(primary_pgdata / "postgresql.conf")) + (primary_pgdata / "postgresql.conf").chmod( + 0o640 if self._group_access else 0o600 + ) + + # Reconnect the rewound primary to the promoted standby (non-remote). + if test_mode != "remote": + self.primary.append_conf( + "primary_conninfo='port={}'\n".format(self.standby.port) + ) + self.primary.set_standby_mode() + + self.primary.start() + + def primary_psql(self, cmd, dbname="postgres"): + """Run cmd on the primary (dies on error), like RewindTest::primary_psql.""" + assert self.primary is not None + self.primary.safe_psql(cmd, dbname=dbname) + + def standby_psql(self, cmd, dbname="postgres"): + """Run cmd on the standby (dies on error), like RewindTest::standby_psql.""" + assert self.standby is not None + self.standby.safe_psql(cmd, dbname=dbname) + + def check_query(self, query, expected_stdout, test_name): + """Assert that query against the primary returns expected_stdout.""" + assert self.primary is not None + result = self.primary.safe_psql(query) + assert result == expected_stdout, "{}: query result matches".format(test_name) + + def clean_rewind_test(self): + """Stop both servers if they are still running.""" + for node in (self.primary, self.standby): + if node is not None: + try: + node.stop() + except Exception: # pylint: disable=broad-exception-caught + pass diff --git a/src/test/pytest/pypg/server.py b/src/test/pytest/pypg/server.py index c0e308b01357b..72a391fd87924 100644 --- a/src/test/pytest/pypg/server.py +++ b/src/test/pytest/pypg/server.py @@ -5,14 +5,21 @@ import pathlib import platform import re +import shlex import shutil import socket +import stat import subprocess import tempfile +import time from collections import namedtuple -from typing import Callable, Optional +from typing import Callable, Optional, Tuple -from .util import run +from ._env import test_timeout_default +from .command import CommandResult, PgBin +from .bgpsql import BackgroundPsql +from .interactive import InteractivePsql +from .util import append_to_file, eprint, run, slurp_file from libpq import PGconn, connect as libpq_connect @@ -25,6 +32,7 @@ def __init__(self, file: pathlib.Path): super().__init__() self._file = file + self._backup: Optional[pathlib.Path] = None def __enter__(self): with tempfile.NamedTemporaryFile( @@ -39,6 +47,7 @@ def __enter__(self): def __exit__(self, *exc): # Swap the backup and the original file, so that the modified contents # can still be inspected in case of failure. + assert self._backup is not None # set by __enter__ tmp = self._backup.parent / (self._backup.name + ".tmp") shutil.copyfile(self._file, tmp) @@ -63,10 +72,10 @@ def prepend(self, *lines): a string or a list of strings. List elements will be joined by single spaces before they are written to file. """ - with open(self._file, "r") as f: + with open(self._file, "r", encoding="utf-8") as f: prior_data = f.read() - with open(self._file, "w") as f: + with open(self._file, "w", encoding="utf-8") as f: for line in lines: if isinstance(line, list): print(*line, file=f) @@ -90,7 +99,7 @@ def set(self, **gucs): Temporarily appends GUC settings to the server's postgresql.conf. """ - with open(self._file, "a") as f: + with open(self._file, "a", encoding="utf-8") as f: print(file=f) for n, v in gucs.items(): @@ -112,6 +121,8 @@ def set(self, **gucs): Backup = namedtuple("Backup", "conf, hba") +WINDOWS_OS = platform.system() == "Windows" + class PostgresServer: """ @@ -129,6 +140,20 @@ def __init__( *, hostaddr: Optional[str] = None, port: Optional[int] = None, + allows_streaming: bool = False, + from_backup: Optional[Tuple["PostgresServer", str]] = None, + has_streaming: bool = False, + has_restoring: bool = False, + standby: bool = True, + has_archiving: bool = False, + extra: Optional[list] = None, + combine_with_prior: Optional[list] = None, + combine_mode: Optional[str] = None, + auth_extra: Optional[list] = None, + no_data_checksums: bool = False, + force_initdb: bool = False, + tablespace_map: Optional[dict] = None, + tar_program: Optional[str] = None, ): """ Initialize a PostgreSQL server instance. Call start() to actually @@ -143,10 +168,22 @@ def __init__( hostaddr: If provided, use this specific address (e.g., "127.0.0.2") port: If provided, use this port instead of finding a free one, is currently only allowed if hostaddr is also provided + allows_streaming: Configure the server as a streaming-replication + primary (wal_level, max_wal_senders, etc.), mirroring + PostgreSQL::Test::Cluster->init(allows_streaming => 1). + from_backup: (source_server, backup_name) to initialize the data + directory from a base backup instead of running initdb, + mirroring init_from_backup(). + has_streaming: When initializing from a backup, configure this + server as a streaming standby of the backup's source server. """ if hostaddr is None and port is not None: raise NotImplementedError("port was provided without hostaddr") + if has_streaming and from_backup is None: + raise ValueError("has_streaming requires from_backup") + if has_restoring and from_backup is None: + raise ValueError("has_restoring requires from_backup") self.name = name self.datadir = datadir @@ -157,6 +194,9 @@ def __init__( self._pg_ctl = bindir / "pg_ctl" self.log = datadir / "postgresql.log" self._log_start_pos = 0 + self._logfile_generation = 0 + self.pid: Optional[int] = None + self._backup_root = pathlib.Path(datadir).parent / (str(name) + "_backups") # ExitStack for cleanup callbacks self._cleanup_stack = contextlib.ExitStack() @@ -164,23 +204,21 @@ def __init__( # Determine whether to use Unix sockets use_unix_sockets = platform.system() != "Windows" and hostaddr is None - # Use INITDB_TEMPLATE if available (much faster than running initdb) - initdb_template = os.environ.get("INITDB_TEMPLATE") - if initdb_template and os.path.isdir(initdb_template): - shutil.copytree(initdb_template, datadir) - else: - if platform.system() == "Windows": - auth_method = "trust" - else: - auth_method = "peer" - run( - bindir / "initdb", - "--no-sync", - "--auth", - auth_method, - "--pgdata", - self.datadir, - ) + # Initialize the data directory: from a base backup, an initdb template + # (much faster), or a fresh initdb. + if no_data_checksums: + extra = (extra or []) + ["--no-data-checksums"] + self._init_datadir( + from_backup, + extra, + combine_with_prior, + combine_mode, + force_initdb, + tablespace_map, + tar_program, + ) + if from_backup is None and auth_extra: + self._config_auth(auth_extra) # Figure out a port to listen on. Attempt to reserve both IPv4 and IPv6 # addresses in one go. @@ -224,11 +262,46 @@ def __init__( else: self.host = hostaddr - with open(os.path.join(datadir, "postgresql.conf"), "a") as f: + self._write_base_config( + use_unix_sockets, addrs, port, allows_streaming, from_backup + ) + + if has_archiving: + self._enable_archiving() + + # Between closing of the socket, s, and server start, we're racing + # against anything that wants to open up ephemeral ports, so try not to + # put any new work here. + + temp_sock.close() + + # Initializing from a backup: optionally turn this into a streaming + # standby of the backup's source server, and/or a restoring standby + # that fetches WAL from the source's archive. + if has_streaming: + assert from_backup is not None # guaranteed by the check above + self._enable_streaming(from_backup[0]) + if has_restoring: + assert from_backup is not None # guaranteed by the check above + self._enable_restoring(from_backup[0], standby) + + def _write_base_config( + self, use_unix_sockets, addrs, port, allows_streaming, from_backup=None + ): + """Append the test server's base configuration to postgresql.conf. + + For a node initialized from a backup only the connection-identity + settings (socket/listen/port) are written: the policy settings + (logging, fsync, restart_after_crash, ...) are inherited from the backup + and may have been intentionally overridden on the source, mirroring + PostgreSQL::Test::Cluster->init_from_backup (which rewrites only port and + listen_addresses/unix_socket_directories). + """ + with open(self.datadir / "postgresql.conf", "a", encoding="utf-8") as f: print(file=f) if use_unix_sockets: print( - "unix_socket_directories = '{}'".format(sockdir.as_posix()), + "unix_socket_directories = '{}'".format(self.sockdir.as_posix()), file=f, ) else: @@ -236,30 +309,485 @@ def __init__( print("unix_socket_directories = ''", file=f) print("listen_addresses = '{}'".format(",".join(addrs)), file=f) print("port =", port, file=f) + if from_backup is not None: + return print("log_connections = all", file=f) print("fsync = off", file=f) print("datestyle = 'ISO'", file=f) print("timezone = 'UTC'", file=f) - # Between closing of the socket, s, and server start, we're racing - # against anything that wants to open up ephemeral ports, so try not to - # put any new work here. + # Logging settings mirroring PostgreSQL::Test::Cluster->init, so + # that statement-log assertions (issues_sql_like) and replication + # behave the same as in the Perl suite. + print("log_statement = all", file=f) + print("log_replication_commands = on", file=f) + print("log_line_prefix = '%m %b[%p] %q%a '", file=f) + print("restart_after_crash = off", file=f) + print("wal_retrieve_retry_interval = '500ms'", file=f) + + if allows_streaming: + wal_level = "logical" if allows_streaming == "logical" else "replica" + print("wal_level = {}".format(wal_level), file=f) + print("max_wal_senders = 10", file=f) + print("max_replication_slots = 10", file=f) + print("wal_log_hints = on", file=f) + print("hot_standby = on", file=f) + print("max_wal_size = 128MB", file=f) + + def _init_datadir( + self, + from_backup, + extra=None, + combine_with_prior=None, + combine_mode=None, + force_initdb=False, + tablespace_map=None, + tar_program=None, + ): + """Populate the data directory from a backup, a template, or initdb. - temp_sock.close() + When extra initdb options are given, a fresh initdb is always run + (the cached template may be incompatible), mirroring the force_initdb + behavior of PostgreSQL::Test::Cluster->init. + """ + if from_backup is not None: + source, backup_name = from_backup + if combine_with_prior: + # Reconstruct a full data directory from a chain of prior + # (full/incremental) backups plus this one, via + # pg_combinebackup (mirrors init_from_backup combine_with_prior). + inputs = [ + str(source.backup_path(prior)) for prior in combine_with_prior + ] + inputs.append(str(source.backup_path(backup_name))) + extra_combine = [combine_mode] if combine_mode else [] + ts_args = [ + "-T{}={}".format(old, new) + for old, new in (tablespace_map or {}).items() + ] + run( + self._bindir / "pg_combinebackup", + *inputs, + *ts_args, + *extra_combine, + "-o", + self.datadir, + ) + elif tar_program: + self._restore_tar_backup( + source.backup_path(backup_name), tar_program, tablespace_map + ) + elif tablespace_map: + self._copy_backup_with_tablespaces( + source.backup_path(backup_name), tablespace_map + ) + else: + shutil.copytree(source.backup_path(backup_name), self.datadir) + # A backup carries the source's postmaster.pid/standby state; remove + # anything that would confuse a fresh start. + for leftover in ("postmaster.pid", "standby.signal", "recovery.signal"): + (self.datadir / leftover).unlink(missing_ok=True) + return - def start(self): - """Start the server using pg_ctl.""" - self.pg_ctl("start") + initdb_template = os.environ.get("INITDB_TEMPLATE") + if ( + initdb_template + and os.path.isdir(initdb_template) + and not extra + and not force_initdb + ): + shutil.copytree(initdb_template, self.datadir) + else: + # Match Cluster.pm and the initdb template: trust auth for local + # connections (the template-copy path above is already trust). + run( + self._bindir / "initdb", + "--no-sync", + "--auth", + "trust", + "--pgdata", + self.datadir, + *(extra or []), + ) + + def _copy_backup_with_tablespaces(self, backup_path, tablespace_map): + """Copy a base backup, relocating mapped tablespaces and writing + tablespace_map (mirrors Cluster->init_from_backup's plain-copy path). + + tablespace_map maps a tablespace OID (the pg_tblspc/ entry) to the + new directory the tablespace should live in. Mapped tablespace links are + skipped during the main copy, copied to their new homes, and recorded in + the data directory's tablespace_map file. + """ + backup_path = pathlib.Path(backup_path) + seen_tsoids = [] + + def _ignore(directory, names): + ignored = [] + rel = pathlib.Path(directory).relative_to(backup_path) + if str(rel) == "pg_tblspc": + for name in names: + if name in tablespace_map: + seen_tsoids.append(name) + ignored.append(name) + return ignored + + shutil.copytree(backup_path, self.datadir, ignore=_ignore) + if not seen_tsoids: + return + with open(self.datadir / "tablespace_map", "w", encoding="utf-8") as tsmap: + for tsoid in seen_tsoids: + olddir = backup_path / "pg_tblspc" / tsoid + newdir = tablespace_map[tsoid] + shutil.copytree(olddir, newdir) + tsmap.write("{} {}\n".format(tsoid, newdir)) + + def _restore_tar_backup(self, backup_path, tar_program, tablespace_map): + """Restore a tar-format base backup into the data directory. + + Mirrors PostgreSQL::Test::Cluster->init_from_backup's tar_program path: + extract base.tar into the data dir and pg_wal.tar into pg_wal, then + extract each numbered tablespace tar into its mapped directory and + record it in the data directory's tablespace_map file. tablespace_map + maps a tablespace OID (the tar's base name) to the directory it should + be restored into. + """ + backup_path = pathlib.Path(backup_path) + tablespace_map = tablespace_map or {} + self.datadir.mkdir(parents=True) + run(tar_program, "xf", backup_path / "base.tar", "-C", self.datadir) + run( + tar_program, + "xf", + backup_path / "pg_wal.tar", + "-C", + self.datadir / "pg_wal", + ) + tstars = sorted( + name for name in os.listdir(backup_path) if re.match(r"^\d+\.tar", name) + ) + with open(self.datadir / "tablespace_map", "w", encoding="utf-8") as tsmap: + for tstar in tstars: + tsoid = re.sub(r"\.tar$", "", tstar) + if tsoid not in tablespace_map: + raise RuntimeError("no tablespace mapping for {}".format(tstar)) + newdir = tablespace_map[tsoid] + os.mkdir(newdir) + run(tar_program, "xf", backup_path / tstar, "-C", newdir) + escaped_newdir = str(newdir).replace("\\", "\\\\") + tsmap.write("{} {}\n".format(tsoid, escaped_newdir)) + + def _config_auth(self, auth_extra): + """Run pg_regress --config-auth on the data dir (mirrors init auth_extra). + + Sets up authentication (e.g. extra ident-mapped roles) so the test's OS + user can connect as those roles. Requires PG_REGRESS in the environment. + """ + pg_regress = os.environ.get("PG_REGRESS") + if not pg_regress: + return + run( + pg_regress, + "--config-auth", + self.datadir, + *(str(opt) for opt in auth_extra), + ) + def start(self, fail_ok=False): + """Start the server using pg_ctl. Returns True on success; with + fail_ok, returns False instead of raising if pg_ctl reports failure.""" + # Set cluster_name at startup (not in postgresql.conf) so it is not + # copied to standbys via backup, mirroring Cluster->start. walreceiver + # uses cluster_name as its application_name in pg_stat_replication. + try: + self.pg_ctl("--options", "--cluster-name={}".format(self.name), "start") + except subprocess.CalledProcessError as exc: + if fail_ok: + return False + # pg_ctl's own output rarely says why startup failed; include the + # server log, which holds the actual startup error. + raise RuntimeError( + 'pg_ctl start failed for node "{}":\n--- {} ---\n{}'.format( + self.name, self.log, self._log_text() + ) + ) from exc # Read the PID file to get the postmaster PID - with open(os.path.join(self.datadir, "postmaster.pid")) as f: + with open(os.path.join(self.datadir, "postmaster.pid"), encoding="utf-8") as f: self.pid = int(f.readline().strip()) + return True + + def _log_text(self): + """Return the whole server log as text, normalizing CRLF/CR to LF. + + Log offsets are character positions in this normalized text (see + current_log_position), not raw byte counts: on Windows the log uses CRLF + line endings, and a byte offset would overshoot the folded text and skip + past the lines being checked. + + Co-authored-by: Andrew Dunstan + """ + if not self.log.exists(): + return "" + with open(self.log, encoding="utf-8", errors="replace") as fh: + text = fh.read() + return text.replace("\r\n", "\n").replace("\r", "\n") def current_log_position(self): - """Get the current end position of the log file.""" - if self.log.exists(): - return self.log.stat().st_size - return 0 + """Get the current end position of the log, as a character offset. + + Character length of the CRLF-normalized log text (not the raw byte + size), so it slices log text consistently on Windows. + """ + return len(self._log_text()) + + def is_alive(self): + """Return True if the server answers pg_isready (mirrors Cluster->is_alive).""" + result = self.bin.run_command( + ["pg_isready", "--host", str(self.host), "--port", str(self.port)] + ) + return result.rc == 0 + + def advance_wal(self, num): + """Advance the WAL by num segments (cf. Cluster->advance_wal). + + Each iteration emits a logical message and forces a WAL switch, which + flushes WAL and moves to a fresh segment. + """ + for _ in range(num): + self.safe_psql( + "SELECT pg_logical_emit_message(false, '', 'foo');\n" + "SELECT pg_switch_wal();" + ) + + def emit_wal(self, size): + """Emit a logical message of size bytes; return its end LSN as an int. + + Mirrors PostgreSQL::Test::Cluster->emit_wal. + """ + return int( + self.safe_psql( + "SELECT pg_logical_emit_message(true, '', repeat('a', {})) " + "- '0/0'".format(size) + ) + ) + + def _get_insert_lsn(self): + """Current WAL insert LSN as an int offset from 0/0.""" + return int(self.safe_psql("SELECT pg_current_wal_insert_lsn() - '0/0'")) + + def advance_wal_out_of_record_splitting_zone(self, wal_block_size): + """Emit WAL until the insert LSN is clear of the page-boundary zone. + + Mirrors Cluster->advance_wal_out_of_record_splitting_zone: keeps the + insert pointer at least a quarter-page away from the end of the current + WAL page so a following record will not be split across pages. + """ + page_threshold = wal_block_size // 4 + end_lsn = self._get_insert_lsn() + page_offset = end_lsn % wal_block_size + while page_offset >= wal_block_size - page_threshold: + self.emit_wal(page_threshold) + end_lsn = self._get_insert_lsn() + page_offset = end_lsn % wal_block_size + return end_lsn + + def advance_wal_to_record_splitting_zone(self, wal_block_size): + """Emit WAL until the insert LSN is near a page boundary. + + Mirrors Cluster->advance_wal_to_record_splitting_zone: positions the + insert pointer within a record-header's width of the page end, so a + following record header straddles the page boundary. + """ + record_header_size = 24 + end_lsn = self._get_insert_lsn() + page_offset = end_lsn % wal_block_size + while page_offset <= wal_block_size - 512: + self.emit_wal(wal_block_size - page_offset - 256) + end_lsn = self._get_insert_lsn() + page_offset = end_lsn % wal_block_size + message_size = wal_block_size - 80 + while page_offset <= wal_block_size - record_header_size: + self.emit_wal(message_size) + end_lsn = self._get_insert_lsn() + old_offset = page_offset + page_offset = end_lsn % wal_block_size + delta = page_offset - old_offset + if delta > 8: + message_size -= 8 + elif delta <= 0: + message_size += 8 + return end_lsn + + def write_wal(self, tli, lsn, segment_size, data): + """Overwrite bytes at lsn in the WAL segment file; return its path. + + Mirrors Cluster->write_wal: locates the segment containing lsn, seeks to + the in-segment offset, and writes data there (raw bytes). + """ + segment = lsn // segment_size + offset = lsn % segment_size + path = self.datadir / "pg_wal" / "{:08X}{:08X}{:08X}".format(tli, 0, segment) + with open(path, "r+b") as fh: + fh.seek(offset) + fh.write(data) + return str(path) + + def dump_info(self): + """Print basic node info for debugging (cf. Cluster->dump_info).""" + eprint( + "# Node {!r}: host={} port={} datadir={}".format( + self.name, self.host, self.port, self.datadir + ) + ) + + def slot(self, slot_name): + """Return a dict of this slot's pg_replication_slots fields. + + Mirrors PostgreSQL::Test::Cluster->slot: an unknown slot yields + empty-string values for every column. + """ + columns = [ + "plugin", + "slot_type", + "datoid", + "database", + "active", + "active_pid", + "xmin", + "catalog_xmin", + "restart_lsn", + ] + row = self.safe_psql( + "SELECT {} FROM pg_catalog.pg_replication_slots " + "WHERE slot_name = '{}'".format(", ".join(columns), slot_name) + ) + values = row.split("|") if row != "" else [""] * len(columns) + return dict(zip(columns, values)) + + def pg_recvlogical_upto( + self, dbname, slot_name, endpos, timeout_secs, options=None + ): + """Stream a logical slot up to endpos and return pg_recvlogical's stdout. + + Mirrors PostgreSQL::Test::Cluster->pg_recvlogical_upto (scalar context): + runs pg_recvlogical --no-loop --start to the given end LSN, applying any + plugin options (a name->value dict), raising on a non-zero exit. + """ + cmd = [ + str(self._bindir / "pg_recvlogical"), + "--slot", + slot_name, + "--dbname", + self.connstr(dbname), + "--endpos", + str(endpos), + "--file", + "-", + "--no-loop", + "--start", + ] + for key, value in (options or {}).items(): + if "=" in key: + raise ValueError("= not permitted in replication option name") + cmd += ["--option", "{}={}".format(key, value)] + proc = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + errors="replace", + env=self._connenv(), + timeout=timeout_secs, + check=False, + ) + if proc.returncode != 0: + raise RuntimeError( + "pg_recvlogical exited with {}, stdout {!r} stderr {!r}".format( + proc.returncode, proc.stdout, proc.stderr + ) + ) + return proc.stdout + + def wait_for_slot_catchup(self, slot_name, mode="restart", target_lsn=None): + """Wait until slot_name's _lsn passes target_lsn. + + Mirrors Cluster->wait_for_slot_catchup. mode is 'restart' or + 'confirmed_flush'. + """ + if mode not in ("restart", "confirmed_flush"): + raise ValueError("valid modes are restart, confirmed_flush") + if target_lsn is None: + raise ValueError("target lsn must be specified") + assert self.poll_query_until( + "SELECT '{}' <= {}_lsn FROM pg_catalog.pg_replication_slots " + "WHERE slot_name = '{}';".format(target_lsn, mode, slot_name) + ), "timed out waiting for catchup" + + def validate_slot_inactive_since(self, slot_name, reference_time): + """Return slot_name's inactive_since after sanity-checking it. + + Mirrors Cluster->validate_slot_inactive_since: the captured + inactive_since must be later than the epoch and than reference_time. + """ + inactive_since = self.safe_psql( + "SELECT inactive_since FROM pg_replication_slots\n" + " WHERE slot_name = '{}' AND inactive_since IS NOT NULL;".format( + slot_name + ) + ) + assert ( + self.safe_psql( + "SELECT '{since}'::timestamptz > to_timestamp(0) AND\n" + " '{since}'::timestamptz > '{ref}'::timestamptz;".format( + since=inactive_since, ref=reference_time + ) + ) + == "t" + ), "last inactive time for slot {} is valid".format(slot_name) + return inactive_since + + def log_standby_snapshot(self, standby, slot_name): + """Emit an xl_running_xacts record the standby's logical slot waits for. + + Mirrors Cluster->log_standby_snapshot: wait until the standby slot's + restart_lsn is determined, then call pg_log_standby_snapshot() on self + (the primary) so the standby can advance the slot. + """ + assert standby.poll_query_until( + "SELECT restart_lsn IS NOT NULL\n" + "FROM pg_catalog.pg_replication_slots WHERE slot_name = '{}'".format( + slot_name + ) + ), "timed out waiting for logical slot to calculate its restart_lsn" + self.safe_psql("SELECT pg_log_standby_snapshot()") + + def create_logical_slot_on_standby(self, primary, slot_name, dbname): + """Create a logical slot on this standby, coordinated with primary. + + Mirrors Cluster->create_logical_slot_on_standby: starts pg_recvlogical + --create-slot in the background, has primary log a standby snapshot so + the slot can compute its restart_lsn, then verifies the slot is logical. + """ + proc = subprocess.Popen( # pylint: disable=consider-using-with + [ + str(self._bindir / "pg_recvlogical"), + "--dbname", + self.connstr(dbname), + "--plugin", + "test_decoding", + "--slot", + slot_name, + "--create-slot", + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self._connenv(), + ) + primary.log_standby_snapshot(self, slot_name) + proc.wait() + assert ( + self.slot(slot_name)["slot_type"] == "logical" + ), "{} on standby created".format(slot_name) def reset_log_position(self): """Mark current log position as start for log_content().""" @@ -286,12 +814,782 @@ def sql(self, query): with self.connect() as conn: return conn.sql(query) + def psql_capture( + self, + query, + dbname="postgres", + on_error_stop=True, + replication=None, + extra_params=None, + connstr=None, + timeout=None, + ): + """ + Run psql with query piped on stdin and return CommandResult(rc, stdout, + stderr) without raising. Mirrors PostgreSQL::Test::Cluster->psql in list + context: --no-psqlrc --no-align --tuples-only --quiet, ON_ERROR_STOP by + default (a SQL error then yields exit code 3), with an optional + replication connection. extra_params are appended to the psql command + line (e.g. ['--username', 'someuser']). A connstr overrides the --dbname + target (libpq merges it with PGHOST/PGPORT from the environment). Use it + to assert on psql's own stdout/stderr/exit code. + """ + if connstr is None: + connstr = self.dbname_connstr(dbname) + if replication is not None: + connstr += " replication={}".format(replication) + cmd = [ + str(self._bindir / "psql"), + "--no-psqlrc", + "--no-align", + "--tuples-only", + "--quiet", + "--dbname", + connstr, + "--file", + "-", + ] + if on_error_stop: + cmd += ["--set", "ON_ERROR_STOP=1"] + if extra_params: + cmd += [str(p) for p in extra_params] + proc = subprocess.run( + cmd, + input=query, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + errors="replace", + env=self._connenv(), + check=False, + timeout=timeout, + ) + # Match Cluster->psql, which chomps a single trailing newline off each. + stdout = proc.stdout[:-1] if proc.stdout.endswith("\n") else proc.stdout + stderr = proc.stderr[:-1] if proc.stderr.endswith("\n") else proc.stderr + return CommandResult(proc.returncode, stdout, stderr) + + def safe_psql( + self, query, dbname="postgres", timeout=None, extra_env=None, connstr=None + ): + """ + Execute query via psql and return its trimmed stdout, raising on error. + Mirrors PostgreSQL::Test::Cluster->safe_psql: the SQL is piped to psql + (so multiple statements run separately, e.g. CREATE DATABASE works), in + tuples-only unaligned mode with ON_ERROR_STOP. An optional timeout (in + seconds) bounds the psql invocation; extra_env adds/overrides connection + environment variables (e.g. PGOPTIONS, PGUSER). A connstr overrides the + --dbname target (merged with PGHOST/PGPORT from the environment), used + by the SSL tests to pick a specific cert/host combination. + """ + if connstr is None: + connstr = self.dbname_connstr(dbname) + cmd = [ + str(self._bindir / "psql"), + "--no-psqlrc", + "--no-align", + "--tuples-only", + "--quiet", + "--set", + "ON_ERROR_STOP=1", + "--dbname", + connstr, + ] + proc = subprocess.run( + cmd, + input=query, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + errors="replace", + env=self._connenv(**(extra_env or {})), + check=False, + timeout=timeout, + ) + if proc.returncode != 0: + raise subprocess.CalledProcessError( + proc.returncode, cmd, proc.stdout, proc.stderr + ) + return proc.stdout.rstrip("\n") + + def check_extension(self, extname): + """Return True if extname is available (in pg_available_extensions). + + Mirrors PostgreSQL::Test::Cluster->check_extension. + """ + return ( + self.safe_psql( + "SELECT count(*) > 0 FROM pg_available_extensions " + "WHERE name = '{}'".format(extname) + ) + == "t" + ) + + def config_data(self, *args): + """Run pg_config from this cluster's install and return its output. + + Mirrors PostgreSQL::Test::Cluster->config_data: with a single option + like '--bindir' the matching value is returned (trailing newline + stripped); with no arguments the full pg_config output is returned. + """ + cmd = [str(self._bindir / "pg_config")] + [str(a) for a in args] + proc = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + errors="replace", + check=True, + ) + return proc.stdout.rstrip("\n") + + def _check_log_patterns(self, test_name, offset, log_like, log_unlike): + """Assert the server log (from offset onward) matches log_like patterns + and matches none of the log_unlike patterns. + + Mirrors the log_like/log_unlike handling of + PostgreSQL::Test::Cluster->connect_ok/connect_fails: each pattern is a + regex applied to the log text emitted since `offset`. Because the + backend writes its log asynchronously, this polls (up to the test + timeout) until the log_like patterns appear before asserting. + """ + if not log_like and not log_unlike: + return + deadline = ( + self._remaining_timeout_fn() + if self._remaining_timeout_fn is not None + else test_timeout_default() + ) + end = time.monotonic() + deadline + log = "" + while True: + log = slurp_file(self.log, offset) if self.log.exists() else "" + if all(re.search(p, log) for p in (log_like or [])): + break + if time.monotonic() >= end: + break + time.sleep(0.1) + for pattern in log_like or []: + assert re.search(pattern, log), "{}: log matches {!r}\nlog:\n{}".format( + test_name, pattern, log + ) + for pattern in log_unlike or []: + assert not re.search( + pattern, log + ), "{}: log unexpectedly matches {!r}\nlog:\n{}".format( + test_name, pattern, log + ) + + def raw_connect(self): + """Open a raw socket to the server's listening endpoint. + + Mirrors PostgreSQL::Test::Cluster->raw_connect: a connected stream socket + to the Unix-domain socket (or TCP host:port) with no protocol + negotiation. The caller drives the wire protocol and closes it. + """ + if platform.system() != "Windows" and not str(self.host).startswith( + ("127.", "::1") + ): + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.connect("{}/.s.PGSQL.{}".format(self.host, self.port)) + else: + sock = socket.create_connection((str(self.host), self.port)) + return sock + + def raw_connect_works(self): + """Return True if raw_connect() works on this platform. + + Mirrors PostgreSQL::Test::Cluster->raw_connect_works. + """ + try: + self.raw_connect().close() + except OSError: + return False + return True + + def connect_ok( + self, + connstr, + test_name, + sql=None, + expected_stdout=None, + expected_stderr=None, + log_like=None, + log_unlike=None, + ): + """Assert that a connection with connstr succeeds. + + Mirrors PostgreSQL::Test::Cluster->connect_ok: psql connects with the + given connstr (merged with PGHOST/PGPORT from the environment) and -w + (never prompt for a password), runs sql (default a trivial SELECT), and + must exit 0. Optionally the stdout must match expected_stdout; stderr + must match expected_stderr if given, else be empty. log_like/log_unlike + are lists of regexes that must (respectively must not) match the server + log emitted during the connection attempt. + """ + if sql is None: + sql = "SELECT $$connected with {}$$".format(connstr) + offset = self.current_log_position() + result = self.psql_capture( + sql, connstr=connstr, extra_params=["-w"], on_error_stop=False + ) + assert result.rc == 0, "{}: exit {}\n{}".format( + test_name, result.rc, result.stderr + ) + if expected_stdout is not None: + assert re.search( + expected_stdout, result.stdout + ), "{}: stdout matches {!r}, got {!r}".format( + test_name, expected_stdout, result.stdout + ) + if expected_stderr is not None: + assert re.search( + expected_stderr, result.stderr + ), "{}: stderr matches {!r}, got {!r}".format( + test_name, expected_stderr, result.stderr + ) + else: + assert result.stderr == "", "{}: no stderr, got {!r}".format( + test_name, result.stderr + ) + self._check_log_patterns(test_name, offset, log_like, log_unlike) + + def connect_fails( + self, connstr, test_name, expected_stderr=None, log_like=None, log_unlike=None + ): + """Assert that a connection with connstr fails. + + Mirrors PostgreSQL::Test::Cluster->connect_fails: psql connects with the + given connstr and -w but no SQL, and must exit non-zero. Optionally the + stderr must match expected_stderr. log_like/log_unlike are lists of + regexes that must (respectively must not) match the server log emitted + during the connection attempt. + """ + offset = self.current_log_position() + result = self.psql_capture( + "", connstr=connstr, extra_params=["-w"], on_error_stop=False + ) + assert result.rc != 0, "{}: expected non-zero exit\n{}".format( + test_name, result.stdout + ) + if expected_stderr is not None: + assert re.search( + expected_stderr, result.stderr + ), "{}: stderr matches {!r}, got {!r}".format( + test_name, expected_stderr, result.stderr + ) + self._check_log_patterns(test_name, offset, log_like, log_unlike) + + def wait_for_event(self, backend_type, wait_event_name): + """Poll until a backend of backend_type is waiting on wait_event_name. + Mirrors PostgreSQL::Test::Cluster->wait_for_event. + """ + ok = self.poll_query_until( + "SELECT count(*) > 0 FROM pg_stat_activity " + "WHERE backend_type = '{}' AND wait_event = '{}'".format( + backend_type, wait_event_name + ) + ) + if not ok: + raise AssertionError( + "timed out waiting for event {!r} on backend {!r}".format( + wait_event_name, backend_type + ) + ) + + def poll_query_until(self, query, expected="t", dbname="postgres"): + """ + Run query via psql repeatedly until its trimmed output equals expected + (default "t") with empty stderr, or the timeout elapses. Returns True + on success, False on timeout. Mirrors + PostgreSQL::Test::Cluster->poll_query_until. + """ + cmd = [ + str(self._bindir / "psql"), + "--no-psqlrc", + "--no-align", + "--tuples-only", + "--dbname", + self.dbname_connstr(dbname), + ] + max_attempts = 10 * test_timeout_default() + stdout = stderr = "" + for _ in range(max_attempts): + proc = subprocess.run( + cmd, + input=query, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + errors="replace", + env=self._connenv(), + check=False, + ) + stdout = proc.stdout.strip() + stderr = proc.stderr.strip() + if stdout == expected and stderr == "": + return True + time.sleep(0.1) + + eprint( + "poll_query_until timed out:\nquery: {}\nexpected: {}\n" + "last stdout: {}\nlast stderr: {}".format(query, expected, stdout, stderr) + ) + return False + + def connstr(self, dbname=None): + """Return a libpq connection string for this server. + + No inner quoting is applied (matching how the Perl suite embeds a + connstr in primary_conninfo='...' / CONNECTION '...'), so the result + can be nested inside a single-quoted string without escaping. Helpers + that hand the dbname to psql (safe_psql/psql_capture/poll_query_until) + escape it themselves via dbname_connstr. + """ + parts = ["host={}".format(self.host), "port={}".format(self.port)] + if dbname: + parts.append("dbname={}".format(dbname)) + return " ".join(parts) + + def dbname_connstr(self, dbname): + """Return a standalone connstr targeting dbname, with the name escaped. + + Mirrors PostgreSQL::Test::Cluster->connstr(dbname): the database name is + single-quoted with backslashes and single quotes escaped, so a name + containing spaces, quotes, or backslashes forms a valid connection + string when passed as one argument (psql --dbname, or a client tool's + connection-string option such as pg_createsubscriber --publisher-server). + Unlike connstr(), the result must not be nested inside another + single-quoted string. + """ + escaped = dbname.replace("\\", "\\\\").replace("'", "\\'") + return "host={} port={} dbname='{}'".format(self.host, self.port, escaped) + + def append_conf(self, text, filename="postgresql.conf"): + """Append text (plus a trailing newline) to a file in the data dir. + + Mirrors PostgreSQL::Test::Cluster->append_conf. + """ + append_to_file(self.datadir / filename, text + "\n") + + def adjust_conf( + self, setting, value, filename="postgresql.conf", skip_equals=False + ): + """Rewrite a config file, replacing or removing a setting in place. + + Mirrors PostgreSQL::Test::Cluster->adjust_conf: every line that sets + `setting` is dropped; if `value` is not None a single new line setting + it is written in its place (other lines preserved). The file mode is + reset to match the data dir's group accessibility. + """ + conffile = self.datadir / filename + eq = "" if skip_equals else "= " + result = [] + for line in slurp_file(conffile).split("\n"): + if not re.match(r"^{}\W".format(re.escape(setting)), line): + if line != "": + result.append(line + "\n") + elif value is not None: + result.append("{} {}{}\n".format(setting, eq, value)) + with open(conffile, "w", encoding="utf-8") as fh: + fh.write("".join(result)) + os.chmod(conffile, self._signal_file_mode()) + + def checksum_enable_offline(self): + """Enable data checksums on the stopped cluster (pg_checksums -e).""" + result = self.bin.run_command(["pg_checksums", "-D", str(self.datadir), "-e"]) + assert result.rc == 0, "pg_checksums -e failed: {}".format(result.stderr) + + def checksum_disable_offline(self): + """Disable data checksums on the stopped cluster (pg_checksums -d).""" + result = self.bin.run_command(["pg_checksums", "-D", str(self.datadir), "-d"]) + assert result.rc == 0, "pg_checksums -d failed: {}".format(result.stderr) + + def backup_path(self, backup_name): + """Return the path where backup_name is (or would be) stored.""" + return self._backup_root / backup_name + + @property + def backup_dir(self): + """The directory holding this server's backups (cf. Cluster->backup_dir). + + Created on demand so server-side backup targets (pg_basebackup --target + server:DIR/...) can write into it immediately, as in the Perl suite. + """ + self._backup_root.mkdir(parents=True, exist_ok=True) + return self._backup_root + + @property + def basedir(self): + """The directory that contains this server's data dir (cf. Cluster).""" + return pathlib.Path(self.datadir).parent + + @property + def archive_dir(self): + """The WAL archive directory for this server (cf. Cluster).""" + return self.basedir / "archives" + + @staticmethod + def _file_copy_command(src, dst): + """A shell command that copies file src to dst. + + src/dst may embed the archive/restore %p/%f placeholders. On Windows + use cmd's ``copy`` with backslash paths; elsewhere ``cp``. + + Co-authored-by: Andrew Dunstan + """ + if WINDOWS_OS: + return 'copy "{}" "{}"'.format( + str(src).replace("/", "\\"), str(dst).replace("/", "\\") + ) + return 'cp "{}" "{}"'.format(src, dst) + + def _enable_archiving(self): + """Create the archive directory and turn on WAL archiving.""" + self.archive_dir.mkdir(parents=True, exist_ok=True) + copy_command = self._file_copy_command("%p", "{}/%f".format(self.archive_dir)) + self.append_conf( + "archive_mode = on\narchive_command = '{}'".format(copy_command) + ) + + def enable_archiving(self): + """Enable WAL archiving on this (stopped) server. + + Mirrors PostgreSQL::Test::Cluster->enable_archiving. + """ + self._enable_archiving() + + def corrupt_page_checksum(self, file, page_offset): + """ + Flip the pd_checksum field of the page at page_offset in a relation + file (relative to the data dir), mirroring + PostgreSQL::Test::Cluster->corrupt_page_checksum. + """ + path = self.datadir / file + with open(path, "r+b") as fh: + fh.seek(page_offset) + header = bytearray(fh.read(24)) + # pd_checksum is a 2-byte field at offset 8 in PageHeaderData. + header[8] ^= 0xFF + header[9] ^= 0xFF + fh.seek(page_offset) + fh.write(header) + + def backup(self, backup_name, backup_options=None): + """ + Take a base backup of this running server with pg_basebackup. Mirrors + PostgreSQL::Test::Cluster->backup. backup_options are extra + pg_basebackup arguments (e.g. --incremental). Returns the backup path. + """ + path = self.backup_path(backup_name) + self._backup_root.mkdir(parents=True, exist_ok=True) + run( + self._bindir / "pg_basebackup", + "--no-sync", + "--pgdata", + path, + "--host", + self.host, + "--port", + str(self.port), + "--checkpoint", + "fast", + *(str(opt) for opt in (backup_options or [])), + env=self._connenv(), + ) + return path + + def backup_fs_cold(self, backup_name): + """ + Filesystem-level cold backup of a stopped server, excluding log/ and + postmaster.pid (mirrors PostgreSQL::Test::Cluster->backup_fs_cold). + """ + path = self.backup_path(backup_name) + self._backup_root.mkdir(parents=True, exist_ok=True) + shutil.copytree( + self.datadir, + path, + ignore=shutil.ignore_patterns("log", "postmaster.pid"), + ) + return path + + def reload(self): + """Reload server configuration via pg_ctl reload.""" + self.pg_ctl("reload") + + def background_psql( + self, + dbname="postgres", + on_error_stop=True, + replication=None, + extra_params=None, + timeout=None, + wait=True, + tuples_only=True, + quiet=True, + ) -> BackgroundPsql: + """ + Start an interactive psql session in the background, mirroring + PostgreSQL::Test::Cluster->background_psql. Close it with .quit(). + + tuples_only/quiet default True (the Perl -XAtq form). Set both False to + see command tags and row-count footers (e.g. 'UPDATE 1', '(1 row)') in + the session output, as some visibility tests match on those. + """ + connstr = self.dbname_connstr(dbname) + if replication is not None: + connstr += " replication={}".format(replication) + cmd = [ + str(self._bindir / "psql"), + "--no-psqlrc", + "--no-align", + ] + if tuples_only: + cmd.append("--tuples-only") + if quiet: + cmd.append("--quiet") + cmd += [ + "--dbname", + connstr, + "--file", + "-", + ] + if on_error_stop: + cmd += ["--set", "ON_ERROR_STOP=1"] + if extra_params: + cmd += extra_params + return BackgroundPsql(cmd, self._connenv(), timeout=timeout, wait=wait) + + def interactive_psql(self, dbname="postgres", history_file=None, extra_params=None): + """Start a PTY-backed interactive psql session. + + Mirrors PostgreSQL::Test::Cluster->interactive_psql: psql runs on a + pseudo-terminal so it believes it is interactive (readline/libedit + enabled for tab-completion and line-editing tests). PSQL_HISTORY/INPUTRC + are redirected and TERM/LS_COLORS unset for deterministic output. + """ + env = self._connenv() + env["PSQL_HISTORY"] = history_file or "/dev/null" + env["INPUTRC"] = "/dev/null" + env.pop("TERM", None) + env.pop("LS_COLORS", None) + cmd = [ + str(self._bindir / "psql"), + "--no-psqlrc", + "--no-align", + "--tuples-only", + "--dbname", + self.dbname_connstr(dbname), + ] + if extra_params: + cmd += [str(p) for p in extra_params] + return InteractivePsql(cmd, env) + + def _enable_streaming(self, source): + """Configure this server as a streaming standby of source.""" + self.append_conf("primary_conninfo='{}'".format(source.connstr())) + (self.datadir / "standby.signal").touch() + + def enable_streaming(self, source): + """Configure this (stopped) server as a streaming standby of source. + + Mirrors PostgreSQL::Test::Cluster->enable_streaming; used to re-stream a + demoted former primary after a failover role swap. + """ + self._enable_streaming(source) + + def enable_restoring(self, source, standby=True): + """Configure this (stopped) server to restore WAL from source's archive. + + Mirrors PostgreSQL::Test::Cluster->enable_restoring. + """ + self._enable_restoring(source, standby) + + def _enable_restoring(self, source, standby=True): + """Configure this server to restore WAL from source's archive. + + With standby=True a standby.signal is placed (standby mode); otherwise a + recovery.signal is placed (recovery mode), mirroring init_from_backup's + has_restoring/standby parameters. + """ + restore_command = self._file_copy_command( + "{}/%f".format(source.archive_dir), "%p" + ) + self.append_conf("restore_command = '{}'".format(restore_command)) + signal = "standby.signal" if standby else "recovery.signal" + sig = self.datadir / signal + sig.touch() + sig.chmod(self._signal_file_mode()) + + def logrotate(self): + """Request a log rotation via pg_ctl logrotate.""" + self.pg_ctl("logrotate") + + def promote(self): + """Promote a standby via pg_ctl promote.""" + self.pg_ctl("promote") + + def teardown_node(self, fail_ok=False): + """Stop the node (mirrors Cluster->teardown_node).""" + self.stop("immediate" if fail_ok else "fast") + + def clean_node(self): + """Stop the node and remove its data directory (cf. Cluster->clean_node). + + Frees the data dir so a node of the same name can be re-created from a + fresh backup, as some streaming tests do. + """ + self.stop() + if self.datadir.exists(): + shutil.rmtree(self.datadir) + + def restart(self, mode="fast", fail_ok=False, log_like=None, log_unlike=None): + """Restart the server via pg_ctl restart and refresh the postmaster PID. + + Mirrors PostgreSQL::Test::Cluster->restart. With fail_ok=True a failed + restart returns False (1 in Perl maps to True here for success) instead + of raising, and log_like/log_unlike (lists of regexes) are asserted + against the log emitted during the restart attempt. Returns True on a + successful restart, False on failure (only when fail_ok). + """ + offset = self.current_log_position() + try: + self.pg_ctl("restart", "--mode", mode) + except subprocess.CalledProcessError: + if not fail_ok: + raise + self._check_log_patterns("restart", offset, log_like, log_unlike) + return False + with open(os.path.join(self.datadir, "postmaster.pid"), encoding="utf-8") as f: + self.pid = int(f.readline().strip()) + self._check_log_patterns("restart", offset, log_like, log_unlike) + return True + + def _signal_file_mode(self): + """0o640 if the data dir is group-accessible (initdb -g), else 0o600.""" + dir_mode = stat.S_IMODE(self.datadir.stat().st_mode) + return 0o640 if dir_mode & stat.S_IRGRP else 0o600 + + def set_standby_mode(self): + """Place a standby.signal file (cf. Cluster->set_standby_mode).""" + sig = self.datadir / "standby.signal" + sig.touch() + sig.chmod(self._signal_file_mode()) + + def set_recovery_mode(self): + """Place a recovery.signal file (cf. Cluster->set_recovery_mode).""" + sig = self.datadir / "recovery.signal" + sig.touch() + sig.chmod(self._signal_file_mode()) + + def rotate_logfile(self): + """Switch to a fresh server log file, used on the next (re)start. + + Mirrors PostgreSQL::Test::Cluster->rotate_logfile. + """ + self._logfile_generation += 1 + self.log = self.datadir / "postgresql_{}.log".format(self._logfile_generation) + return self.log + + def lsn(self, mode): + """Return a WAL LSN of the given kind, or None if empty (cf. Cluster->lsn). + + mode is one of insert, flush, write, receive, replay. + """ + modes = { + "insert": "pg_current_wal_insert_lsn()", + "flush": "pg_current_wal_flush_lsn()", + "write": "pg_current_wal_lsn()", + "receive": "pg_last_wal_receive_lsn()", + "replay": "pg_last_wal_replay_lsn()", + } + if mode not in modes: + raise ValueError("unknown mode for lsn: {!r}".format(mode)) + result = self.safe_psql("SELECT {}".format(modes[mode])) + return result or None + + def wait_for_replay_catchup(self, standby, node=None): + """Wait until standby has replayed up to this node's flush LSN. + + Mirrors PostgreSQL::Test::Cluster->wait_for_replay_catchup. + """ + source = node if node is not None else self + self.wait_for_catchup(standby, "replay", source.lsn("flush")) + + def wait_for_catchup(self, standby, mode="replay", target_lsn=None): + """ + Wait until a standby has caught up to target_lsn (default: this node's + current write/replay LSN), by polling pg_stat_replication. Mirrors + PostgreSQL::Test::Cluster->wait_for_catchup (the polling fallback path). + + standby may be a PostgresServer or an application_name string. + """ + valid_modes = ("sent", "write", "flush", "replay") + if mode not in valid_modes: + raise ValueError("unknown mode {!r} for wait_for_catchup".format(mode)) + + standby_name = standby.name if isinstance(standby, PostgresServer) else standby + + if target_lsn is None: + if self.safe_psql("SELECT pg_is_in_recovery()") == "t": + target_lsn = self.lsn("replay") + else: + target_lsn = self.lsn("write") + + # Match the connection whose application_name is standby_name. Standbys + # with a tool-generated primary_conninfo (pg_rewind / pg_basebackup + # --write-recovery-conf) connect without setting application_name and so + # report 'walreceiver'; fall back to that, but only when no connection + # with the requested name exists. Otherwise an unrelated 'walreceiver' + # connection (e.g. a physical standby alongside a named logical + # subscriber) would also match, the per-row query would return more than + # one row, and poll_query_until's single-"t" comparison never succeeds. + query = ( + "SELECT '{lsn}' <= {mode}_lsn AND state = 'streaming'" + " FROM pg_catalog.pg_stat_replication" + " WHERE application_name = '{name}'" + " OR (application_name = 'walreceiver'" + " AND NOT EXISTS (SELECT 1 FROM pg_catalog.pg_stat_replication" + " WHERE application_name = '{name}'))" + ).format(lsn=target_lsn, mode=mode, name=standby_name) + + if not self.poll_query_until(query): + details = self.safe_psql("SELECT * FROM pg_catalog.pg_stat_replication") + raise AssertionError( + "timed out waiting for catchup\n" + "pg_stat_replication:\n{}".format(details) + ) + + def wait_for_subscription_sync( + self, publisher=None, subname=None, dbname="postgres" + ): + """ + Wait for all of this subscriber's tables to finish initial sync, then + (if publisher/subname given) wait for the publisher to catch up. + Mirrors PostgreSQL::Test::Cluster->wait_for_subscription_sync. + """ + query = ( + "SELECT count(1) = 0 FROM pg_subscription_rel " + "WHERE srsubstate NOT IN ('r', 's');" + ) + if not self.poll_query_until(query, dbname=dbname): + details = self.safe_psql("SELECT * FROM pg_subscription_rel", dbname=dbname) + raise AssertionError( + "timed out waiting for subscriber to synchronize data\n" + "pg_subscription_rel:\n{}".format(details) + ) + + if publisher is not None: + if subname is None: + raise ValueError("subscription name must be specified") + publisher.wait_for_catchup(subname) + def pg_ctl(self, *args): """Run pg_ctl with the given arguments.""" self._run(self._pg_ctl, "--pgdata", self.datadir, "--log", self.log, *args) - def _run(self, cmd, *args, addenv: Optional[dict] = None): - """Run a command with PG* environment variables set.""" + def _connenv(self, **extra): + """Return an environment dict with this server's PG* connection vars.""" subenv = dict(os.environ) subenv.update( { @@ -301,8 +1599,120 @@ def _run(self, cmd, *args, addenv: Optional[dict] = None): "PGDATA": str(self.datadir), } ) - if addenv: - subenv.update(addenv) + subenv.update(extra) + return subenv + + @property + def bin_dir(self): + """This server's bin directory (cf. Cluster install bindir).""" + return self._bindir + + @property + def connenv(self): + """An environment dict with this server's PG* connection vars. + + Public view of the connection environment, for spawning server binaries + (e.g. postgres --single) directly via subprocess. + """ + return self._connenv() + + @property + def bin(self) -> PgBin: + """A PgBin bound to this server's bindir and connection environment. + + Use it for node-scoped command assertions, e.g. node.bin.command_ok(). + """ + return PgBin( + self._bindir, + extra_env={ + "PGHOST": str(self.host), + "PGPORT": str(self.port), + "PGDATABASE": "postgres", + "PGDATA": str(self.datadir), + }, + ) + + def command_ok(self, cmd, msg=None): + """command_ok against this server's connection. See PgBin.command_ok.""" + return self.bin.command_ok(cmd, msg) + + def command_fails(self, cmd, msg=None): + """command_fails against this server's connection.""" + return self.bin.command_fails(cmd, msg) + + def command_like(self, cmd, pattern, msg=None): + """command_like against this server's connection.""" + return self.bin.command_like(cmd, pattern, msg) + + def command_fails_like(self, cmd, pattern, msg=None): + """command_fails_like against this server's connection.""" + return self.bin.command_fails_like(cmd, pattern, msg) + + def command_checks_all(self, cmd, exit_code, stdout_res, stderr_res, msg=None): + """command_checks_all against this server's connection.""" + return self.bin.command_checks_all(cmd, exit_code, stdout_res, stderr_res, msg) + + def pgbench( # pylint: disable=keyword-arg-before-vararg + self, opts, exit_code, stdout_res, stderr_res, msg, files=None, *args + ): + """Run pgbench against this server and check its output. + + Mirrors PostgreSQL::Test::Cluster->pgbench: opts is a string of pgbench + options, files maps a script name to SQL run via -f, and args are + appended. A script name may carry a trailing ``@`` which is kept + in the --file argument (pgbench reads it as the script weight) but + stripped from the on-disk filename. Files are written to this node's + basedir in sorted order for determinism, and the command is verified + with command_checks_all (expected exit_code, stdout_res/stderr_res). + """ + cmd = ["pgbench"] + shlex.split(opts) + script_files = files or {} + for name in sorted(script_files): + file_arg = self.basedir / name + cmd += ["-f", str(file_arg)] + # Strip a trailing @ to get the real on-disk filename. + on_disk = re.sub(r"@\d+$", "", str(file_arg)) + assert not os.path.exists(on_disk), "{} must not already exist".format( + on_disk + ) + with open(on_disk, "w", encoding="utf-8") as fh: + fh.write(script_files[name]) + cmd += list(args) + return self.bin.command_checks_all(cmd, exit_code, stdout_res, stderr_res, msg) + + def issues_sql_like(self, cmd, pattern, msg=None): + """ + Run cmd against this server (expecting exit 0), then assert the server + log gained a line matching pattern. Mirrors + PostgreSQL::Test::Cluster->issues_sql_like (relies on log_statement=all). + """ + offset = self.current_log_position() + self.command_ok(cmd, msg) + log = slurp_file(self.log, offset) + assert re.search( + pattern, log + ), "{}: pattern {!r} not found in server log\nlog:\n{}".format( + msg, pattern, log + ) + + def issues_sql_unlike(self, cmd, pattern, msg=None): + """ + Run cmd against this server (expecting exit 0), then assert the server + log did NOT gain a line matching pattern. Mirrors + PostgreSQL::Test::Cluster->issues_sql_unlike. + """ + offset = self.current_log_position() + self.command_ok(cmd, msg) + log = slurp_file(self.log, offset) + assert not re.search( + pattern, log + ), "{}: pattern {!r} unexpectedly found in server log\nlog:\n{}".format( + msg, pattern, log + ) + + def _run(self, cmd, *args, addenv: Optional[dict] = None): + """Run a command with PG* environment variables set.""" + subenv = self._connenv(**(addenv or {})) run(cmd, *args, env=subenv) def create_users(self, *userkeys: str): @@ -388,7 +1798,7 @@ def subcontext(self): old_stack = self._cleanup_stack self._cleanup_stack = contextlib.ExitStack() try: - self._cleanup_stack.__enter__() + self._cleanup_stack.__enter__() # pylint: disable=unnecessary-dunder-call yield self finally: self._cleanup_stack.__exit__(None, None, None) @@ -406,14 +1816,81 @@ def stop(self, mode="fast"): # Server may have already been stopped pass + def signal_backend(self, pid, signame): + """Send signal signame (e.g. "QUIT", "KILL", "TERM") to process pid. + + Uses ``pg_ctl kill``, which delivers the signal through the server's own + mechanism and so works on every platform (Windows has no Unix signals). + Not for SIGSTOP/SIGCONT, which pg_ctl kill cannot send; those remain + Unix-only via os.kill in the few tests that need them. + + Co-authored-by: Andrew Dunstan + """ + self._run(os.path.join(self._bindir, "pg_ctl"), "kill", signame, str(pid)) + + def kill9(self): + """Hard-kill the postmaster (cf. PostgreSQL::Test::Cluster->kill9). + + Reads the postmaster PID from postmaster.pid and signals it via + ``pg_ctl kill KILL`` (portable to Windows); a no-op if the file is + absent (server already gone). + + Co-authored-by: Andrew Dunstan + """ + pidfile = os.path.join(self.datadir, "postmaster.pid") + try: + with open(pidfile, encoding="utf-8") as fh: + pid = int(fh.readline().strip()) + except (OSError, ValueError): + return + try: + self.signal_backend(pid, "KILL") + except subprocess.CalledProcessError: + pass + def log_content(self) -> str: """Return log content from the current context's start position.""" if not self.log.exists(): return "" - with open(self.log) as f: + with open(self.log, encoding="utf-8", errors="replace") as f: f.seek(self._log_start_pos) return f.read() + def log_matches(self, pattern, offset=0) -> bool: + """Return True if the server log matches pattern from offset onward. + + Boolean counterpart to PostgreSQL::Test::Cluster->log_contains (the + context-manager log_contains() on this class checks during a block). + offset is a character position from current_log_position(). + """ + return re.search(pattern, self._log_text()[offset:]) is not None + + def wait_for_log(self, pattern, offset=0): + """ + Poll the server log until pattern matches from offset onward, returning + the new end offset. Mirrors PostgreSQL::Test::Cluster->wait_for_log. + offset is a character position (see current_log_position). + """ + max_attempts = 10 * test_timeout_default() + for _ in range(max_attempts): + text = self._log_text() + if re.search(pattern, text[offset:]): + return len(text) + time.sleep(0.1) + raise AssertionError("timed out waiting for log to match: {!r}".format(pattern)) + + def log_check(self, test_name, offset, log_like=None, log_unlike=None): + """Assert the server log (from offset onward) matches the given patterns. + + Mirrors PostgreSQL::Test::Cluster->log_check: log_like is a list of + regexes that must all match the log text emitted since offset, and + log_unlike a list of regexes that must none of them match. Because the + backend flushes its log asynchronously, this polls (up to the test + timeout) for the log_like patterns before asserting, so callers should + first wait_for_log() on the event that guarantees the lines are present. + """ + self._check_log_patterns(test_name, offset, log_like, log_unlike) + @contextlib.contextmanager def log_contains(self, pattern, times=None): """ @@ -426,16 +1903,16 @@ def log_contains(self, pattern, times=None): """ start_pos = self.current_log_position() yield - with open(self.log) as f: + with open(self.log, encoding="utf-8", errors="replace") as f: f.seek(start_pos) content = f.read() if times is None: assert re.search(pattern, content), f"Pattern {pattern!r} not found in log" else: match_count = len(re.findall(pattern, content)) - assert match_count == times, ( - f"Expected {times} matches of {pattern!r}, found {match_count}" - ) + assert ( + match_count == times + ), f"Expected {times} matches of {pattern!r}, found {match_count}" def cleanup(self): """Run all registered cleanup callbacks.""" diff --git a/src/test/pytest/pypg/sysv_shm.py b/src/test/pytest/pypg/sysv_shm.py new file mode 100644 index 0000000000000..feb7162fe8f80 --- /dev/null +++ b/src/test/pytest/pypg/sysv_shm.py @@ -0,0 +1,52 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Minimal System V shared-memory helper via ctypes. + +Python twin of the IPC::SharedMem usage in src/test/recovery/t/017_shm.pl: create +a shared-memory segment with an explicit key (the test uses the data directory's +inode, the same key PostgreSQL derives) so the server detects a conflicting +pre-existing segment, and remove it again. +""" + +import ctypes +import ctypes.util + +_IPC_CREAT = 0o1000 +_IPC_EXCL = 0o2000 +_IPC_RMID = 0 +_S_IRUSR = 0o400 +_S_IWUSR = 0o200 + +_libc = ctypes.CDLL(ctypes.util.find_library("c") or "libc.so.6", use_errno=True) +_libc.shmget.argtypes = [ctypes.c_int, ctypes.c_size_t, ctypes.c_int] +_libc.shmget.restype = ctypes.c_int +_libc.shmctl.argtypes = [ctypes.c_int, ctypes.c_int, ctypes.c_void_p] +_libc.shmctl.restype = ctypes.c_int + + +class SysVSharedMemory: + """A System V shared-memory segment created with an explicit key. + + Mirrors IPC::SharedMem->new(key, size, IPC_CREAT|IPC_EXCL|S_IRUSR|S_IWUSR). + create() returns an instance on success or None if the segment could not be + created (e.g. it already exists); remove() deletes it. + """ + + def __init__(self, shmid): + self.shmid = shmid + + @classmethod + def create(cls, key, size=1024): + """Create a new segment for key, or return None if creation failed.""" + ctypes.set_errno(0) + shmid = _libc.shmget(key, size, _IPC_CREAT | _IPC_EXCL | _S_IRUSR | _S_IWUSR) + if shmid < 0: + return None + return cls(shmid) + + def remove(self): + """Remove the segment (IPC_RMID). Idempotent.""" + if self.shmid is None: + return + _libc.shmctl(self.shmid, _IPC_RMID, None) + self.shmid = None diff --git a/src/test/pytest/pypg/util.py b/src/test/pytest/pypg/util.py index d46a73d001969..f1a3d5345f7e1 100644 --- a/src/test/pytest/pypg/util.py +++ b/src/test/pytest/pypg/util.py @@ -1,8 +1,61 @@ # Copyright (c) 2025, PostgreSQL Global Development Group +import os +import re import shlex +import socket +import stat import subprocess import sys +import tempfile + + +def run_captured(argv, *, env=None, combine_stderr=False, timeout=None): + """Run argv, capturing output through temporary files instead of pipes. + + Returns ``(returncode, stdout, stderr)`` as text. With combine_stderr, + stderr is folded into stdout and the returned stderr is "". + + Output is captured to temporary files rather than subprocess.PIPE because of + how starting a server behaves: ``pg_ctl start`` launches a postmaster that + inherits and holds open the write end of the parent's stdout/stderr pipe for + its whole lifetime. Reading such a pipe to EOF -- as subprocess does to + collect output -- then blocks until the postmaster exits, i.e. forever + (notably on Windows, and under constrained CI process models). A regular + file handle has no EOF dependency on the writer staying alive, so the parent + reads the captured output as soon as the launched program returns. + + Co-authored-by: Andrew Dunstan + """ + out = tempfile.TemporaryFile() # pylint: disable=consider-using-with + err = subprocess.STDOUT if combine_stderr else tempfile.TemporaryFile() + try: + proc = subprocess.run( + argv, env=env, stdout=out, stderr=err, timeout=timeout, check=False + ) + out.seek(0) + stdout = _decode(out.read()) + if combine_stderr: + stderr = "" + else: + err.seek(0) + stderr = _decode(err.read()) + finally: + out.close() + if err is not subprocess.STDOUT: + err.close() + return proc.returncode, stdout, stderr + + +def _decode(data): + """Decode captured output as text, folding CRLF/CR to LF. + + Programs may emit non-UTF-8 bytes (e.g. LATIN1 object names) that we only + regex-match, so decode leniently. Reading a file gives no universal-newline + handling, so normalize line endings to match text-mode capture. + """ + text = data.decode("utf-8", "replace") + return text.replace("\r\n", "\n").replace("\r", "\n") def eprint(*args, **kwargs): @@ -13,7 +66,7 @@ def eprint(*args, **kwargs): def run(*command, check=True, shell=None, silent=False, **kwargs): """run runs the given command and prints it to stderr""" - __tracebackhide__ = True # Don't show in pytest stack traces + __tracebackhide__ = True # pylint: disable=unused-variable if shell is None: shell = len(command) == 1 and isinstance(command[0], str) @@ -48,8 +101,174 @@ def run(*command, check=True, shell=None, silent=False, **kwargs): def capture(command, *args, stdout=subprocess.PIPE, encoding="utf-8", **kwargs): - __tracebackhide__ = True # Don't pollute pytest stack traces + __tracebackhide__ = True # pylint: disable=unused-variable return run( command, *args, stdout=stdout, encoding=encoding, **kwargs ).stdout.removesuffix("\n") + + +def slurp_file(path, offset=0): + """Read and return a file's contents, optionally starting at a byte offset. + + Mirrors PostgreSQL::Test::Utils::slurp_file. + """ + with open(path, encoding="utf-8", errors="replace") as f: + if offset: + f.seek(offset) + return f.read() + + +def append_to_file(path, data): + """Append data to a file, creating it if necessary.""" + with open(path, "a", encoding="utf-8") as f: + f.write(data) + + +def slurp_dir(path): + """Return the entries of a directory (cf. PostgreSQL::Test::Utils::slurp_dir).""" + return os.listdir(path) + + +def check_pg_config(regexp): + """Return True if a line of the installed pg_config.h matches regexp at start. + + Mirrors PostgreSQL::Test::Utils::check_pg_config: runs `pg_config + --includedir` (pg_config resolved from PG_CONFIG or PATH) and greps + pg_config.h for ``^regexp``. + """ + pg_config = os.environ.get("PG_CONFIG", "pg_config") + proc = subprocess.run( + [pg_config, "--includedir"], + stdout=subprocess.PIPE, + encoding="utf-8", + check=True, + ) + includedir = proc.stdout.strip() + with open(os.path.join(includedir, "pg_config.h"), encoding="utf-8") as fh: + return any(re.match(regexp, line) for line in fh) + + +def scan_server_header(header_path, regexp): + """Return the regexp capture groups from the first matching server-header line. + + Mirrors PostgreSQL::Test::Utils::scan_server_header: runs `pg_config + --includedir-server` and greps header_path (relative) for ``^regexp``, + returning the captured groups of the first match. Raises if no line matches. + """ + pg_config = os.environ.get("PG_CONFIG", "pg_config") + proc = subprocess.run( + [pg_config, "--includedir-server"], + stdout=subprocess.PIPE, + encoding="utf-8", + check=True, + ) + includedir = proc.stdout.strip() + with open(os.path.join(includedir, header_path), encoding="utf-8") as fh: + for line in fh: + match = re.match(regexp, line) + if match: + return match.groups() + raise RuntimeError("could not find match in header {}".format(header_path)) + + +def get_free_port(): + """Reserve and return a likely-free localhost TCP port. + + Like PostgreSQL::Test::Cluster::get_free_port, this is best-effort: the + port is released before it is returned, so a caller must bind it promptly. + """ + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + return s.getsockname()[1] + + +def check_mode_recursive(directory, dir_mode, file_mode): + """Check permissions recursively under directory. + + Returns True if every directory has mode dir_mode and every regular file + has mode file_mode (comparing the low 12 permission bits). Mirrors + PostgreSQL::Test::Utils::check_mode_recursive. Files that vanish mid-walk + (a running server may delete them) are tolerated. + """ + result = True + + for dirpath, _dirs, files in os.walk(directory): + try: + if stat.S_IMODE(os.stat(dirpath).st_mode) != dir_mode: + eprint("{} mode must be {:04o}".format(dirpath, dir_mode)) + result = False + except FileNotFoundError: + pass + + for fname in files: + fpath = os.path.join(dirpath, fname) + try: + st = os.stat(fpath) + except FileNotFoundError: + continue + if stat.S_ISREG(st.st_mode) and stat.S_IMODE(st.st_mode) != file_mode: + eprint("{} mode must be {:04o}".format(fpath, file_mode)) + result = False + + return result + + +def chmod_recursive(directory, dir_mode, file_mode): + """chmod every directory (dir_mode) and regular file (file_mode) under + directory. Mirrors PostgreSQL::Test::Utils::chmod_recursive. + """ + os.chmod(directory, dir_mode) + for dirpath, dirs, files in os.walk(directory): + for name in dirs: + path = os.path.join(dirpath, name) + if not os.path.islink(path): + os.chmod(path, dir_mode) + for name in files: + path = os.path.join(dirpath, name) + if not os.path.islink(path): + try: + os.chmod(path, file_mode) + except FileNotFoundError: + pass + + +def compare_files(file_a, file_b, msg, line_filter=None): + """Assert two files are line-by-line equal, with an optional line filter. + + Mirrors PostgreSQL::Test::Utils::compare_files: if line_filter is given it + is applied to each (line_a, line_b) pair before comparison (e.g. to mask + environment-specific text); pairs the filter accepts as equal are skipped. + """ + with open(file_a, encoding="utf-8") as fa, open(file_b, encoding="utf-8") as fb: + lines_a = fa.readlines() + lines_b = fb.readlines() + assert len(lines_a) == len(lines_b), "{}: differing line counts".format(msg) + for line_a, line_b in zip(lines_a, lines_b): + if line_filter is not None: + line_a, line_b = line_filter(line_a, line_b) + assert line_a == line_b, "{}: line differs: {!r} != {!r}".format( + msg, line_a, line_b + ) + + +def wait_for_file(filename, regexp, offset=0): + """Wait until filename exists and its contents (from offset) match regexp. + + Mirrors PostgreSQL::Test::Utils::wait_for_file: polls up to the default + timeout, returning the new end offset (offset + matched length) on success + and raising on timeout. + """ + import time as _time # pylint: disable=import-outside-toplevel + from ._env import test_timeout_default # pylint: disable=import-outside-toplevel + + max_attempts = 10 * test_timeout_default() + for _ in range(max_attempts): + if os.path.exists(filename): + contents = slurp_file(filename, offset) + if re.search(regexp, contents): + return offset + len(contents) + _time.sleep(0.1) + raise TimeoutError( + "timed out waiting for file {} contents to match: {}".format(filename, regexp) + ) diff --git a/src/test/pytest/pyt/test_fake.py b/src/test/pytest/pyt/test_fake.py new file mode 100644 index 0000000000000..addff773e4dfb --- /dev/null +++ b/src/test/pytest/pyt/test_fake.py @@ -0,0 +1,51 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +"""Tests for pypg.fake: rand_str (the randStr equivalent) and optional Faker.""" + +import string +import warnings + +import pytest + +import pypg + + +def test_rand_str_length_and_charset(): + """rand_str returns the requested length using only [A-Za-z0-9].""" + allowed = set(string.ascii_letters + string.digits) + for length in (0, 1, 10, 10000): + s = pypg.rand_str(length) + assert len(s) == length + assert set(s) <= allowed + + +def test_rand_str_custom_charset_and_randomness(): + """A custom charset is respected, and successive calls differ (very likely).""" + assert set(pypg.rand_str(50, charset="ab")) <= {"a", "b"} + # Two independent 64-char draws over 62 symbols collide with negligible odds. + assert pypg.rand_str(64) != pypg.rand_str(64) + + +def test_rand_str_rejects_bad_args(): + """Negative length and empty charset are rejected.""" + with pytest.raises(ValueError): + pypg.rand_str(-1) + with pytest.raises(ValueError): + pypg.rand_str(5, charset="") + + +def test_faker_optional(): + """faker() returns a usable instance, or None with a single warning.""" + pypg.fake._warned.discard("faker") # pylint: disable=protected-access + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + fake = pypg.faker() + if fake is None: + # Faker not installed: exactly one RuntimeWarning, and the convenience + # helper still returns a usable string via the rand_str fallback. + assert any(issubclass(w.category, RuntimeWarning) for w in caught) + assert len(pypg.meaningful_text(40)) > 0 + else: + # Faker installed: it produces realistic, non-empty fake data. + assert isinstance(fake.name(), str) and fake.name() + assert len(pypg.meaningful_text(80)) > 0 diff --git a/src/test/pytest/pyt/test_libpq.py b/src/test/pytest/pyt/test_libpq.py index 49726f579e231..b1d8776e0a5fc 100644 --- a/src/test/pytest/pyt/test_libpq.py +++ b/src/test/pytest/pyt/test_libpq.py @@ -118,7 +118,7 @@ def _bg(): sock.settimeout(remaining_timeout()) fn(sock) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught # Save the exception for re-raising on the main thread. self._thread_exc = e @@ -139,6 +139,7 @@ def _join(self): # racing against the test's own use of remaining_timeout(). (It's # preferable to let tests report timeouts; the stack traces will # help with debugging.) + assert self._thread is not None # set by background() self._thread.join(remaining_timeout() + 1) if self._thread.is_alive(): raise TimeoutError("background thread is still running after timeout") From 129dc6e053243b6b55bf2ff5956beafdcc48326e Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:31 -0400 Subject: [PATCH 10/36] pytest: port src/bin client-program TAP suites Port the TAP suites for the client and server programs under src/bin (pg_ctl, scripts, initdb, pg_config, pg_controldata, pg_resetwal, pg_checksums, pg_archivecleanup, pg_waldump, pg_walsummary, pgbench, pg_test_fsync, pg_test_timing, psql) and src/tools/pg_bsd_indent to pytest, running beside the Perl originals. Co-authored-by: Greg Burd --- src/bin/initdb/Makefile | 1 + src/bin/initdb/meson.build | 6 + src/bin/initdb/pyt/test_001_initdb.py | 353 ++++ src/bin/pg_amcheck/meson.build | 9 + src/bin/pg_amcheck/pyt/test_001_basic.py | 15 + src/bin/pg_amcheck/pyt/test_002_nonesuch.py | 376 ++++ src/bin/pg_amcheck/pyt/test_003_check.py | 550 ++++++ .../pg_amcheck/pyt/test_004_verify_heapam.py | 545 ++++++ .../pg_amcheck/pyt/test_005_opclass_damage.py | 50 + src/bin/pg_archivecleanup/Makefile | 1 + src/bin/pg_archivecleanup/meson.build | 5 + .../pyt/test_010_pg_archivecleanup.py | 148 ++ src/bin/pg_checksums/Makefile | 1 + src/bin/pg_checksums/meson.build | 6 + src/bin/pg_checksums/pyt/test_001_basic.py | 10 + src/bin/pg_checksums/pyt/test_002_actions.py | 248 +++ src/bin/pg_config/Makefile | 1 + src/bin/pg_config/meson.build | 5 + src/bin/pg_config/pyt/test_001_pg_config.py | 28 + src/bin/pg_controldata/Makefile | 1 + src/bin/pg_controldata/meson.build | 5 + .../pyt/test_001_pg_controldata.py | 44 + src/bin/pg_ctl/Makefile | 1 + src/bin/pg_ctl/meson.build | 8 + src/bin/pg_ctl/pyt/test_001_start_stop.py | 105 ++ src/bin/pg_ctl/pyt/test_002_status.py | 34 + src/bin/pg_ctl/pyt/test_003_promote.py | 69 + src/bin/pg_ctl/pyt/test_004_logrotate.py | 121 ++ src/bin/pg_resetwal/Makefile | 1 + src/bin/pg_resetwal/meson.build | 6 + src/bin/pg_resetwal/pyt/test_001_basic.py | 189 ++ src/bin/pg_resetwal/pyt/test_002_corrupted.py | 57 + src/bin/pg_test_fsync/Makefile | 1 + src/bin/pg_test_fsync/meson.build | 5 + src/bin/pg_test_fsync/pyt/test_001_basic.py | 21 + src/bin/pg_test_timing/Makefile | 1 + src/bin/pg_test_timing/meson.build | 5 + src/bin/pg_test_timing/pyt/test_001_basic.py | 40 + src/bin/pg_waldump/meson.build | 7 + src/bin/pg_waldump/pyt/test_001_basic.py | 473 +++++ .../pg_waldump/pyt/test_002_save_fullpage.py | 84 + src/bin/pg_walsummary/Makefile | 1 + src/bin/pg_walsummary/meson.build | 8 +- src/bin/pg_walsummary/pyt/test_001_basic.py | 16 + src/bin/pg_walsummary/pyt/test_002_blocks.py | 85 + src/bin/pgbench/Makefile | 1 + src/bin/pgbench/meson.build | 6 + .../pyt/test_001_pgbench_with_server.py | 1546 +++++++++++++++++ .../pgbench/pyt/test_002_pgbench_no_server.py | 271 +++ src/bin/psql/Makefile | 1 + src/bin/psql/meson.build | 9 + src/bin/psql/pyt/test_001_basic.py | 363 ++++ src/bin/psql/pyt/test_010_tab_completion.py | 533 ++++++ src/bin/psql/pyt/test_020_cancel.py | 43 + src/bin/psql/pyt/test_030_pager.py | 74 + src/bin/scripts/Makefile | 1 + src/bin/scripts/meson.build | 18 + src/bin/scripts/pyt/test_010_clusterdb.py | 35 + src/bin/scripts/pyt/test_011_clusterdb_all.py | 48 + src/bin/scripts/pyt/test_020_createdb.py | 413 +++++ src/bin/scripts/pyt/test_040_createuser.py | 124 ++ src/bin/scripts/pyt/test_050_dropdb.py | 40 + src/bin/scripts/pyt/test_070_dropuser.py | 25 + src/bin/scripts/pyt/test_080_pg_isready.py | 24 + src/bin/scripts/pyt/test_090_reindexdb.py | 302 ++++ src/bin/scripts/pyt/test_091_reindexdb_all.py | 56 + src/bin/scripts/pyt/test_100_vacuumdb.py | 460 +++++ src/bin/scripts/pyt/test_101_vacuumdb_all.py | 29 + .../scripts/pyt/test_102_vacuumdb_stages.py | 46 + src/bin/scripts/pyt/test_200_connstr.py | 46 + src/tools/pg_bsd_indent/meson.build | 5 + .../pyt/test_001_pg_bsd_indent.py | 59 + 72 files changed, 8323 insertions(+), 1 deletion(-) create mode 100644 src/bin/initdb/pyt/test_001_initdb.py create mode 100644 src/bin/pg_amcheck/pyt/test_001_basic.py create mode 100644 src/bin/pg_amcheck/pyt/test_002_nonesuch.py create mode 100644 src/bin/pg_amcheck/pyt/test_003_check.py create mode 100644 src/bin/pg_amcheck/pyt/test_004_verify_heapam.py create mode 100644 src/bin/pg_amcheck/pyt/test_005_opclass_damage.py create mode 100644 src/bin/pg_archivecleanup/pyt/test_010_pg_archivecleanup.py create mode 100644 src/bin/pg_checksums/pyt/test_001_basic.py create mode 100644 src/bin/pg_checksums/pyt/test_002_actions.py create mode 100644 src/bin/pg_config/pyt/test_001_pg_config.py create mode 100644 src/bin/pg_controldata/pyt/test_001_pg_controldata.py create mode 100644 src/bin/pg_ctl/pyt/test_001_start_stop.py create mode 100644 src/bin/pg_ctl/pyt/test_002_status.py create mode 100644 src/bin/pg_ctl/pyt/test_003_promote.py create mode 100644 src/bin/pg_ctl/pyt/test_004_logrotate.py create mode 100644 src/bin/pg_resetwal/pyt/test_001_basic.py create mode 100644 src/bin/pg_resetwal/pyt/test_002_corrupted.py create mode 100644 src/bin/pg_test_fsync/pyt/test_001_basic.py create mode 100644 src/bin/pg_test_timing/pyt/test_001_basic.py create mode 100644 src/bin/pg_waldump/pyt/test_001_basic.py create mode 100644 src/bin/pg_waldump/pyt/test_002_save_fullpage.py create mode 100644 src/bin/pg_walsummary/pyt/test_001_basic.py create mode 100644 src/bin/pg_walsummary/pyt/test_002_blocks.py create mode 100644 src/bin/pgbench/pyt/test_001_pgbench_with_server.py create mode 100644 src/bin/pgbench/pyt/test_002_pgbench_no_server.py create mode 100644 src/bin/psql/pyt/test_001_basic.py create mode 100644 src/bin/psql/pyt/test_010_tab_completion.py create mode 100644 src/bin/psql/pyt/test_020_cancel.py create mode 100644 src/bin/psql/pyt/test_030_pager.py create mode 100644 src/bin/scripts/pyt/test_010_clusterdb.py create mode 100644 src/bin/scripts/pyt/test_011_clusterdb_all.py create mode 100644 src/bin/scripts/pyt/test_020_createdb.py create mode 100644 src/bin/scripts/pyt/test_040_createuser.py create mode 100644 src/bin/scripts/pyt/test_050_dropdb.py create mode 100644 src/bin/scripts/pyt/test_070_dropuser.py create mode 100644 src/bin/scripts/pyt/test_080_pg_isready.py create mode 100644 src/bin/scripts/pyt/test_090_reindexdb.py create mode 100644 src/bin/scripts/pyt/test_091_reindexdb_all.py create mode 100644 src/bin/scripts/pyt/test_100_vacuumdb.py create mode 100644 src/bin/scripts/pyt/test_101_vacuumdb_all.py create mode 100644 src/bin/scripts/pyt/test_102_vacuumdb_stages.py create mode 100644 src/bin/scripts/pyt/test_200_connstr.py create mode 100644 src/tools/pg_bsd_indent/pyt/test_001_pg_bsd_indent.py diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index 21b755025ad61..c483d5d8a4c84 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -65,6 +65,7 @@ export with_icu check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/initdb/meson.build b/src/bin/initdb/meson.build index bc6eb2e085ca4..51c5705787814 100644 --- a/src/bin/initdb/meson.build +++ b/src/bin/initdb/meson.build @@ -36,6 +36,12 @@ tests += { 't/001_initdb.pl', ], }, + 'pytest': { + 'env': {'with_icu': icu.found() ? 'yes' : 'no'}, + 'tests': [ + 'pyt/test_001_initdb.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/initdb/pyt/test_001_initdb.py b/src/bin/initdb/pyt/test_001_initdb.py new file mode 100644 index 0000000000000..b12f4d9863cb8 --- /dev/null +++ b/src/bin/initdb/pyt/test_001_initdb.py @@ -0,0 +1,353 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/initdb/t/001_initdb.pl. + +To test successful data directory creation with an additional feature, first +try to elaborate the "successful creation" test instead of adding a test: +successful initdb consumes much time and I/O. +""" + +import os +import platform +import re + +import pypg + +windows_os = platform.system() == "Windows" + + +def test_initdb(pg_bin, tmp_path, monkeypatch): + """initdb argument handling, successful creation, providers, checksums.""" + xlogdir = tmp_path / "pgxlog" + datadir = tmp_path / "data" + supports_syncfs = pg_bin.check_pg_config("#define HAVE_SYNCFS 1") + + pg_bin.program_help_ok("initdb") + pg_bin.program_version_ok("initdb") + pg_bin.program_options_handling_ok("initdb") + + pg_bin.command_fails( + ["initdb", "--sync-only", tmp_path / "nonexistent"], + "sync missing data directory", + ) + + xlogdir.mkdir() + (xlogdir / "lost+found").mkdir() + pg_bin.command_fails( + ["initdb", "--waldir", xlogdir, datadir], "existing nonempty xlog directory" + ) + (xlogdir / "lost+found").rmdir() + pg_bin.command_fails( + ["initdb", "--waldir", "pgxlog", datadir], "relative xlog directory not allowed" + ) + + pg_bin.command_fails( + ["initdb", "--username", "pg_test", datadir], + 'role names cannot begin with "pg_"', + ) + + datadir.mkdir() + + # Run one successful test without a TZ setting to exercise initdb's time + # zone setting code. + monkeypatch.delenv("TZ", raising=False) + pg_bin.command_ok( + [ + "initdb", + "--no-sync", + "--text-search-config", + "german", + "--set", + "default_text_search_config=german", + "--waldir", + xlogdir, + datadir, + ], + "successful creation", + ) + if not windows_os: + assert pypg.check_mode_recursive( + datadir, 0o700, 0o600 + ), "check PGDATA permissions" + + # Control file should report data checksums enabled by default. + pg_bin.command_like( + ["pg_controldata", datadir], + r"Data page checksum version:.*1", + "checksums are enabled in control file", + ) + + pg_bin.command_ok(["initdb", "--sync-only", datadir], "sync only") + pg_bin.command_ok( + ["initdb", "--sync-only", "--no-sync-data-files", datadir], + "--no-sync-data-files", + ) + pg_bin.command_fails(["initdb", datadir], "existing data directory") + + if supports_syncfs: + pg_bin.command_ok( + ["initdb", "--sync-only", datadir, "--sync-method", "syncfs"], + "sync method syncfs", + ) + else: + pg_bin.command_fails( + ["initdb", "--sync-only", datadir, "--sync-method", "syncfs"], + "sync method syncfs", + ) + + if not windows_os: + datadir_group = tmp_path / "data_group" + pg_bin.command_ok( + ["initdb", "--allow-group-access", datadir_group], + "successful creation with group access", + ) + assert pypg.check_mode_recursive( + datadir_group, 0o750, 0o640 + ), "check PGDATA permissions" + + if os.environ.get("with_icu") == "yes": + _test_icu_provider(pg_bin, tmp_path) + else: + pg_bin.command_fails( + ["initdb", "--no-sync", "--locale-provider", "icu", tmp_path / "data2"], + "locale provider ICU fails since no ICU support", + ) + + _test_builtin_provider(pg_bin, tmp_path) + _test_set_and_checksums(pg_bin, tmp_path) + + +def _test_icu_provider(pg_bin, tmp_path): + pg_bin.command_fails_like( + ["initdb", "--no-sync", "--locale-provider", "icu", tmp_path / "data2"], + r"initdb: error: locale must be specified if provider is icu", + "locale provider ICU requires --icu-locale", + ) + pg_bin.command_ok( + [ + "initdb", + "--no-sync", + "--locale-provider", + "icu", + "--icu-locale", + "en", + tmp_path / "data3", + ], + "option --icu-locale", + ) + pg_bin.command_like( + [ + "initdb", + "--no-sync", + "--auth", + "trust", + "--locale-provider", + "icu", + "--locale", + "und", + "--lc-collate", + "C", + "--lc-ctype", + "C", + "--lc-messages", + "C", + "--lc-numeric", + "C", + "--lc-monetary", + "C", + "--lc-time", + "C", + tmp_path / "data4", + ], + r"(?ms)^\s+default collation:\s+und\n", + "options --locale-provider=icu --locale=und --lc-*=C", + ) + for icu_locale, pattern, name in ( + ( + "@colNumeric=lower", + r"could not open collator for locale", + "fails for invalid ICU locale", + ), + ( + "nonsense-nowhere", + r'error: locale "nonsense-nowhere" has unknown language "nonsense"', + "fails for nonsense language", + ), + ( + "@colNumeric=lower", + r'could not open collator for locale "und-u-kn-lower": ' + r"U_ILLEGAL_ARGUMENT_ERROR", + "fails for invalid collation argument", + ), + ): + pg_bin.command_fails_like( + [ + "initdb", + "--no-sync", + "--locale-provider", + "icu", + "--icu-locale", + icu_locale, + tmp_path / "dataX", + ], + pattern, + name, + ) + pg_bin.command_fails_like( + [ + "initdb", + "--no-sync", + "--locale-provider", + "icu", + "--encoding", + "SQL_ASCII", + "--icu-locale", + "en", + tmp_path / "dataX", + ], + r"error: encoding mismatch", + "fails for encoding not supported by ICU", + ) + + +def _test_builtin_provider(pg_bin, tmp_path): + pg_bin.command_fails( + ["initdb", "--no-sync", "--locale-provider", "builtin", tmp_path / "data6"], + "locale provider builtin fails without --locale", + ) + pg_bin.command_ok( + [ + "initdb", + "--no-sync", + "--locale-provider", + "builtin", + "--locale", + "C", + tmp_path / "data7", + ], + "locale provider builtin with --locale", + ) + pg_bin.command_ok( + [ + "initdb", + "--no-sync", + "--locale-provider", + "builtin", + "--encoding", + "UTF-8", + "--lc-collate", + "C", + "--lc-ctype", + "C", + "--builtin-locale", + "C.UTF-8", + tmp_path / "data8", + ], + "locale provider builtin with --encoding=UTF-8 --builtin-locale=C.UTF-8", + ) + pg_bin.command_fails( + [ + "initdb", + "--no-sync", + "--locale-provider", + "builtin", + "--encoding", + "SQL_ASCII", + "--lc-collate", + "C", + "--lc-ctype", + "C", + "--builtin-locale", + "C.UTF-8", + tmp_path / "data9", + ], + "locale provider builtin with --builtin-locale=C.UTF-8 fails for SQL_ASCII", + ) + pg_bin.command_ok( + [ + "initdb", + "--no-sync", + "--locale-provider", + "builtin", + "--lc-ctype", + "C", + "--locale", + "C", + tmp_path / "data10", + ], + "locale provider builtin with --lc-ctype", + ) + for args, name in ( + (["--icu-locale", "en"], "fails for locale provider builtin with ICU locale"), + (["--icu-rules", '""'], "fails for locale provider builtin with ICU rules"), + ): + pg_bin.command_fails( + [ + "initdb", + "--no-sync", + "--locale-provider", + "builtin", + *args, + tmp_path / "dataX", + ], + name, + ) + pg_bin.command_fails( + ["initdb", "--no-sync", "--locale-provider", "xyz", tmp_path / "dataX"], + "fails for invalid locale provider", + ) + pg_bin.command_fails( + [ + "initdb", + "--no-sync", + "--locale-provider", + "libc", + "--icu-locale", + "en", + tmp_path / "dataX", + ], + "fails for invalid option combination", + ) + + +def _test_set_and_checksums(pg_bin, tmp_path): + pg_bin.command_fails( + ["initdb", "--no-sync", "--set", "foo=bar", tmp_path / "dataX"], + "fails for invalid --set option", + ) + + # Multiple --set parameters are added case-insensitively. + pg_bin.command_ok( + [ + "initdb", + "--no-sync", + "--set", + "work_mem=128", + "--set", + "Work_Mem=256", + "--set", + "WORK_MEM=512", + tmp_path / "dataY", + ], + "multiple --set options with different case", + ) + conf = pypg.slurp_file(tmp_path / "dataY" / "postgresql.conf") + assert not re.search(r"(?m)^WORK_MEM = ", conf), "WORK_MEM should not be configured" + assert not re.search(r"(?m)^Work_Mem = ", conf), "Work_Mem should not be configured" + assert re.search(r"(?m)^work_mem = 512", conf), "work_mem should be in config" + + # --no-data-checksums flag. + datadir_nochecksums = tmp_path / "data_no_checksums" + pg_bin.command_ok( + ["initdb", "--no-data-checksums", datadir_nochecksums], + "successful creation without data checksums", + ) + pg_bin.command_like( + ["pg_controldata", datadir_nochecksums], + r"Data page checksum version:.*0", + "checksums are disabled in control file", + ) + pg_bin.command_fails( + ["pg_checksums", "--pgdata", datadir_nochecksums], + "pg_checksums fails with data checksum disabled", + ) diff --git a/src/bin/pg_amcheck/meson.build b/src/bin/pg_amcheck/meson.build index 592cef74ecb9a..cd1dbe7b85ca2 100644 --- a/src/bin/pg_amcheck/meson.build +++ b/src/bin/pg_amcheck/meson.build @@ -21,6 +21,15 @@ tests += { 'name': 'pg_amcheck', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_nonesuch.py', + 'pyt/test_005_opclass_damage.py', + 'pyt/test_003_check.py', + 'pyt/test_004_verify_heapam.py', + ], + }, 'tap': { 'tests': [ 't/001_basic.pl', diff --git a/src/bin/pg_amcheck/pyt/test_001_basic.py b/src/bin/pg_amcheck/pyt/test_001_basic.py new file mode 100644 index 0000000000000..8d89e51579540 --- /dev/null +++ b/src/bin/pg_amcheck/pyt/test_001_basic.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_amcheck/t/001_basic.pl. + +pg_amcheck --help / --version / invalid-option handling. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_basic(pg_bin): + """pg_amcheck --help / --version / invalid-option handling..""" + pg_bin.program_help_ok("pg_amcheck") + pg_bin.program_version_ok("pg_amcheck") + pg_bin.program_options_handling_ok("pg_amcheck") diff --git a/src/bin/pg_amcheck/pyt/test_002_nonesuch.py b/src/bin/pg_amcheck/pyt/test_002_nonesuch.py new file mode 100644 index 0000000000000..11fc718b599b5 --- /dev/null +++ b/src/bin/pg_amcheck/pyt/test_002_nonesuch.py @@ -0,0 +1,376 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_amcheck/t/002_nonesuch.pl. + +pg_amcheck error handling for nonexistent targets: missing databases, schemas, tables, indexes, and roles each produce the documented diagnostics and exit codes. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_002_nonesuch(create_pg): + """pg_amcheck diagnostics for nonexistent databases/schemas/tables/roles.""" + node = create_pg("test", auth_extra=["--create-role", "no_such_user"], start=False) + node.start() + node.safe_psql("CREATE EXTENSION amcheck") + node.command_checks_all( + ["pg_amcheck", "qqq"], + 1, + [r"""^$"""], + [r"""FATAL: database "qqq" does not exist"""], + "checking a non-existent database", + ) + node.command_checks_all( + ["pg_amcheck", "--database", "qqq", "--database", "postgres"], + 1, + [r"""^$"""], + [r'''pg_amcheck: error: no connectable databases to check matching "qqq"'''], + "checking an unresolvable database pattern", + ) + node.command_checks_all( + [ + "pg_amcheck", + "--no-strict-names", + "--database", + "qqq", + "--database", + "postgres", + ], + 0, + [r"""^$"""], + [r'''pg_amcheck: warning: no connectable databases to check matching "qqq"'''], + "checking an unresolvable database pattern under --no-strict-names", + ) + node.command_checks_all( + ["pg_amcheck", "--database", "post", "--database", "postgres"], + 1, + [r"""^$"""], + [r'''pg_amcheck: error: no connectable databases to check matching "post"'''], + "checking an unresolvable database pattern (substring of existent database)", + ) + node.command_checks_all( + ["pg_amcheck", "--database", "postgresql", "--database", "postgres"], + 1, + [r"""^$"""], + [ + r'''pg_amcheck: error: no connectable databases to check matching "postgresql"''' + ], + "checking an unresolvable database pattern (superstring of existent database)", + ) + node.command_checks_all( + ["pg_amcheck", "--username", "no_such_user", "postgres"], + 1, + [r"""^$"""], + [r"""role "no_such_user" does not exist"""], + "checking with a non-existent user", + ) + node.command_checks_all( + ["pg_amcheck", "template1"], + 1, + [r"""^$"""], + [ + r"""pg_amcheck: warning: skipping database "template1": amcheck is not installed""", + r"""pg_amcheck: error: no relations to check""", + ], + "checking a database by name without amcheck installed, no other databases", + ) + node.command_checks_all( + ["pg_amcheck", "--database", "template1", "--database", "postgres"], + 0, + [r"""^$"""], + [ + r"""pg_amcheck: warning: skipping database "template1": amcheck is not installed""" + ], + "checking a database by name without amcheck installed, with other databases", + ) + node.command_checks_all( + ["pg_amcheck", "--all"], + 0, + [r"""^$"""], + [ + r"""pg_amcheck: warning: skipping database "template1": amcheck is not installed""" + ], + "checking a database by pattern without amcheck installed, with other databases", + ) + node.command_checks_all( + ["pg_amcheck", "--database", "postgres", "--table", ".."], + 1, + [r"""^$"""], + [r'''pg_amcheck: error: no connectable databases to check matching "\.\."'''], + 'checking table pattern ".."', + ) + node.command_checks_all( + ["pg_amcheck", "--database", "postgres", "--table", ".foo.bar"], + 1, + [r"""^$"""], + [ + r'''pg_amcheck: error: no connectable databases to check matching "\.foo\.bar"''' + ], + 'checking table pattern ".foo.bar"', + ) + node.command_checks_all( + ["pg_amcheck", "--database", "postgres", "--table", "."], + 1, + [r"""^$"""], + [r'''pg_amcheck: error: no heap tables to check matching "\."'''], + 'checking table pattern "."', + ) + node.command_checks_all( + ["pg_amcheck", "--database", "localhost.postgres"], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper qualified name \(too many dotted names\): localhost\.postgres""" + ], + "multipart database patterns are rejected", + ) + node.command_checks_all( + ["pg_amcheck", "--schema", "localhost.postgres.pg_catalog"], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper qualified name \(too many dotted names\): localhost\.postgres\.pg_catalog""" + ], + "three part schema patterns are rejected", + ) + node.command_checks_all( + ["pg_amcheck", "--table", "localhost.postgres.pg_catalog.pg_class"], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper relation name \(too many dotted names\): localhost\.postgres\.pg_catalog\.pg_class""" + ], + "four part table patterns are rejected", + ) + node.command_checks_all( + [ + "pg_amcheck", + "--no-strict-names", + "--table", + "this.is.a.really.long.dotted.string", + ], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper relation name \(too many dotted names\): this\.is\.a\.really\.long\.dotted\.string""" + ], + "ungrammatical table names still draw errors under --no-strict-names", + ) + node.command_checks_all( + ["pg_amcheck", "--no-strict-names", "--schema", "postgres.long.dotted.string"], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper qualified name \(too many dotted names\): postgres\.long\.dotted\.string""" + ], + "ungrammatical schema names still draw errors under --no-strict-names", + ) + node.command_checks_all( + [ + "pg_amcheck", + "--no-strict-names", + "--database", + "postgres.long.dotted.string", + ], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper qualified name \(too many dotted names\): postgres\.long\.dotted\.string""" + ], + "ungrammatical database names still draw errors under --no-strict-names", + ) + node.command_checks_all( + ["pg_amcheck", "--no-strict-names", "--exclude-table", "a.b.c.d"], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper relation name \(too many dotted names\): a\.b\.c\.d""" + ], + "ungrammatical table exclusions still draw errors under --no-strict-names", + ) + node.command_checks_all( + ["pg_amcheck", "--no-strict-names", "--exclude-schema", "a.b.c"], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper qualified name \(too many dotted names\): a\.b\.c""" + ], + "ungrammatical schema exclusions still draw errors under --no-strict-names", + ) + node.command_checks_all( + ["pg_amcheck", "--no-strict-names", "--exclude-database", "a.b"], + 2, + [r"""^$"""], + [ + r"""pg_amcheck: error: improper qualified name \(too many dotted names\): a\.b""" + ], + "ungrammatical database exclusions still draw errors under --no-strict-names", + ) + node.command_checks_all( + [ + "pg_amcheck", + "--no-strict-names", + "--table", + "no_such_table", + "--table", + "no*such*table", + "--index", + "no_such_index", + "--index", + "no*such*index", + "--relation", + "no_such_relation", + "--relation", + "no*such*relation", + "--database", + "no_such_database", + "--database", + "no*such*database", + "--relation", + "none.none", + "--relation", + "none.none.none", + "--relation", + "postgres.none.none", + "--relation", + "postgres.pg_catalog.none", + "--relation", + "postgres.none.pg_class", + "--table", + "postgres.pg_catalog.pg_class", + ], + 0, + [r"""^$"""], + [ + r'''pg_amcheck: warning: no heap tables to check matching "no_such_table"''', + r'''pg_amcheck: warning: no heap tables to check matching "no\*such\*table"''', + r'''pg_amcheck: warning: no btree indexes to check matching "no_such_index"''', + r'''pg_amcheck: warning: no btree indexes to check matching "no\*such\*index"''', + r'''pg_amcheck: warning: no relations to check matching "no_such_relation"''', + r'''pg_amcheck: warning: no relations to check matching "no\*such\*relation"''', + r'''pg_amcheck: warning: no heap tables to check matching "no\*such\*table"''', + r'''pg_amcheck: warning: no connectable databases to check matching "no_such_database"''', + r'''pg_amcheck: warning: no connectable databases to check matching "no\*such\*database"''', + r'''pg_amcheck: warning: no relations to check matching "none\.none"''', + r'''pg_amcheck: warning: no connectable databases to check matching "none\.none\.none"''', + r'''pg_amcheck: warning: no relations to check matching "postgres\.none\.none"''', + r'''pg_amcheck: warning: no relations to check matching "postgres\.pg_catalog\.none"''', + r'''pg_amcheck: warning: no relations to check matching "postgres\.none\.pg_class"''', + r'''pg_amcheck: warning: no connectable databases to check matching "no_such_database"''', + r'''pg_amcheck: warning: no connectable databases to check matching "no\*such\*database"''', + r'''pg_amcheck: warning: no connectable databases to check matching "none\.none\.none"''', + ], + "many unmatched patterns and one matched pattern under --no-strict-names", + ) + node.safe_psql( + "CREATE DATABASE regression_invalid;\n\tUPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';" + ) + node.command_checks_all( + ["pg_amcheck", "--database", "regression_invalid"], + 1, + [r"""^$"""], + [ + r'''pg_amcheck: error: no connectable databases to check matching "regression_invalid"''' + ], + "checking handling of invalid database", + ) + node.command_checks_all( + [ + "pg_amcheck", + "--database", + "postgres", + "--table", + "regression_invalid.public.foo", + ], + 1, + [r"""^$"""], + [ + r'''pg_amcheck: error: no connectable databases to check matching "regression_invalid.public.foo"''' + ], + "checking handling of object in invalid database", + ) + node.safe_psql( + "CREATE TABLE public.foo (f integer);\n\tCREATE INDEX foo_idx ON foo(f);" + ) + node.safe_psql("CREATE DATABASE another_db") + node.command_checks_all( + [ + "pg_amcheck", + "--database", + "postgres", + "--no-strict-names", + "--table", + "template1.public.foo", + "--table", + "another_db.public.foo", + "--table", + "no_such_database.public.foo", + "--index", + "template1.public.foo_idx", + "--index", + "another_db.public.foo_idx", + "--index", + "no_such_database.public.foo_idx", + ], + 1, + [r"""^$"""], + [ + r"""pg_amcheck: warning: skipping database "template1": amcheck is not installed""", + r'''pg_amcheck: warning: no heap tables to check matching "template1\.public\.foo"''', + r'''pg_amcheck: warning: no heap tables to check matching "another_db\.public\.foo"''', + r'''pg_amcheck: warning: no connectable databases to check matching "no_such_database\.public\.foo"''', + r'''pg_amcheck: warning: no btree indexes to check matching "template1\.public\.foo_idx"''', + r'''pg_amcheck: warning: no btree indexes to check matching "another_db\.public\.foo_idx"''', + r'''pg_amcheck: warning: no connectable databases to check matching "no_such_database\.public\.foo_idx"''', + r"""pg_amcheck: error: no relations to check""", + ], + "checking otherwise existent objects in the wrong databases", + ) + node.command_checks_all( + [ + "pg_amcheck", + "--all", + "--no-strict-names", + "--exclude-schema", + "public", + "--exclude-schema", + "pg_catalog", + "--exclude-schema", + "pg_toast", + "--exclude-schema", + "information_schema", + ], + 1, + [r"""^$"""], + [ + r"""pg_amcheck: warning: skipping database "template1": amcheck is not installed""", + r"""pg_amcheck: error: no relations to check""", + ], + "schema exclusion patterns exclude all relations", + ) + node.command_checks_all( + [ + "pg_amcheck", + "--all", + "--no-strict-names", + "--schema", + "public", + "--schema", + "pg_catalog", + "--schema", + "pg_toast", + "--schema", + "information_schema", + "--table", + "pg_catalog.pg_class", + "--exclude-schema", + "*", + ], + 1, + [r"""^$"""], + [ + r"""pg_amcheck: warning: skipping database "template1": amcheck is not installed""", + r"""pg_amcheck: error: no relations to check""", + ], + "schema exclusion pattern overrides all inclusion patterns", + ) diff --git a/src/bin/pg_amcheck/pyt/test_003_check.py b/src/bin/pg_amcheck/pyt/test_003_check.py new file mode 100644 index 0000000000000..d6ee7101c46cf --- /dev/null +++ b/src/bin/pg_amcheck/pyt/test_003_check.py @@ -0,0 +1,550 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_amcheck/t/003_check.pl. + +End-to-end pg_amcheck corruption checks across multiple databases and schemas: +amcheck installed in an unexpected schema, decoy catalog-named tables and decoy +amcheck-named functions, planned corruptions applied in a single restart +(removed relation files and clobbered first pages), and the resulting +exit-code/stdout/stderr expectations for many option combinations. +""" + +import os +import struct + + +def _relation_filepath(node, dbname, relname): + """Return the absolute on-disk path of a relation's main fork.""" + rel = node.safe_psql( + "SELECT pg_relation_filepath('{}')".format(relname), dbname=dbname + ) + assert rel, "path not found for relation {}".format(relname) + return os.path.join(node.datadir, rel) + + +def _relation_toast(node, dbname, relname): + """Return the toast relation name for relname, or '' if none.""" + return node.safe_psql( + "SELECT c.reltoastrelid::regclass\n" + " FROM pg_catalog.pg_class c\n" + " WHERE c.oid = '{}'::regclass\n" + " AND c.reltoastrelid != 0".format(relname), + dbname=dbname, + ) + + +def _corrupt_first_page(relpath): + """Clobber the first page's line pointers with corruption-triggering junk.""" + # The values are chosen to hit the various line-pointer-corruption checks + # in verify_heapam.c on both little-endian and big-endian architectures + # (Perl pack("L*", ...) is native unsigned 32-bit). + payload = struct.pack( + "=7I", + 0xAAA15550, + 0xAAA0D550, + 0x00010000, + 0x00008000, + 0x0000800F, + 0x001E8000, + 0xFFFFFFFF, + ) + with open(relpath, "r+b") as fh: + fh.seek(32) + fh.write(payload) + + +_AMCHECK_DECOYS = """\ +CREATE SCHEMA amcheck_schema; +CREATE EXTENSION amcheck WITH SCHEMA amcheck_schema; +CREATE TABLE amcheck_schema.pg_database (junk text); +CREATE TABLE amcheck_schema.pg_namespace (junk text); +CREATE TABLE amcheck_schema.pg_class (junk text); +CREATE TABLE amcheck_schema.pg_operator (junk text); +CREATE TABLE amcheck_schema.pg_proc (junk text); +CREATE TABLE amcheck_schema.pg_tablespace (junk text); + +CREATE FUNCTION public.bt_index_check(index regclass, + heapallindexed boolean default false) +RETURNS VOID AS $$ +BEGIN + RAISE EXCEPTION 'Invoked wrong bt_index_check!'; +END; +$$ LANGUAGE plpgsql; + +CREATE FUNCTION public.bt_index_parent_check(index regclass, + heapallindexed boolean default false, + rootdescend boolean default false) +RETURNS VOID AS $$ +BEGIN + RAISE EXCEPTION 'Invoked wrong bt_index_parent_check!'; +END; +$$ LANGUAGE plpgsql; + +CREATE FUNCTION public.verify_heapam(relation regclass, + on_error_stop boolean default false, + check_toast boolean default false, + skip text default 'none', + startblock bigint default null, + endblock bigint default null, + blkno OUT bigint, + offnum OUT integer, + attnum OUT integer, + msg OUT text) +RETURNS SETOF record AS $$ +BEGIN + RAISE EXCEPTION 'Invoked wrong verify_heapam!'; +END; +$$ LANGUAGE plpgsql; +""" + + +def _schema_objects_sql(schema): + """SQL creating the identical per-schema set of relations and indexes.""" + return """\ +CREATE SCHEMA {s}; +CREATE SEQUENCE {s}.seq1; +CREATE SEQUENCE {s}.seq2; +CREATE TABLE {s}.t1 ( + i INTEGER, + b BOX, + ia int4[], + ir int4range, + t TEXT +); +CREATE TABLE {s}.t2 ( + i INTEGER, + b BOX, + ia int4[], + ir int4range, + t TEXT +); +CREATE VIEW {s}.t2_view AS ( + SELECT i*2, t FROM {s}.t2 +); +ALTER TABLE {s}.t2 + ALTER COLUMN t + SET STORAGE EXTERNAL; + +INSERT INTO {s}.t1 (i, b, ia, ir, t) + (SELECT gs::INTEGER AS i, + box(point(gs,gs+5),point(gs*2,gs*3)) AS b, + array[gs, gs + 1]::int4[] AS ia, + int4range(gs, gs+100) AS ir, + repeat('foo', gs) AS t + FROM generate_series(1,10000,3000) AS gs); + +INSERT INTO {s}.t2 (i, b, ia, ir, t) + (SELECT gs::INTEGER AS i, + box(point(gs,gs+5),point(gs*2,gs*3)) AS b, + array[gs, gs + 1]::int4[] AS ia, + int4range(gs, gs+100) AS ir, + repeat('foo', gs) AS t + FROM generate_series(1,10000,3000) AS gs); + +CREATE MATERIALIZED VIEW {s}.t1_mv AS SELECT * FROM {s}.t1; +CREATE MATERIALIZED VIEW {s}.t2_mv AS SELECT * FROM {s}.t2; + +create table {s}.p1 (a int, b int) PARTITION BY list (a); +create table {s}.p2 (a int, b int) PARTITION BY list (a); + +create table {s}.p1_1 partition of {s}.p1 for values in (1, 2, 3); +create table {s}.p1_2 partition of {s}.p1 for values in (4, 5, 6); +create table {s}.p2_1 partition of {s}.p2 for values in (1, 2, 3); +create table {s}.p2_2 partition of {s}.p2 for values in (4, 5, 6); + +CREATE INDEX t1_btree ON {s}.t1 USING BTREE (i); +CREATE INDEX t2_btree ON {s}.t2 USING BTREE (i); + +CREATE INDEX t1_hash ON {s}.t1 USING HASH (i); +CREATE INDEX t2_hash ON {s}.t2 USING HASH (i); + +CREATE INDEX t1_brin ON {s}.t1 USING BRIN (i); +CREATE INDEX t2_brin ON {s}.t2 USING BRIN (i); + +CREATE INDEX t1_gist ON {s}.t1 USING GIST (b); +CREATE INDEX t2_gist ON {s}.t2 USING GIST (b); + +CREATE INDEX t1_gin ON {s}.t1 USING GIN (ia); +CREATE INDEX t2_gin ON {s}.t2 USING GIN (ia); + +CREATE INDEX t1_spgist ON {s}.t1 USING SPGIST (ir); +CREATE INDEX t2_spgist ON {s}.t2 USING SPGIST (ir); + +CREATE UNIQUE INDEX t1_btree_unique ON {s}.t1 USING BTREE (i); +CREATE UNIQUE INDEX t2_btree_unique ON {s}.t2 USING BTREE (i); +""".format( + s=schema + ) + + +class _CorruptionPlan: + """Accumulates relation files to corrupt or remove, applied in one restart.""" + + def __init__(self, node): + self._node = node + self._corrupt_page = set() + self._remove_relation = set() + + def corrupt_first_page(self, dbname, relname): + """Plan to clobber the first page of (dbname, relname).""" + self._corrupt_page.add(_relation_filepath(self._node, dbname, relname)) + + def remove_relation_file(self, dbname, relname): + """Plan to remove the relation file of (dbname, relname).""" + self._remove_relation.add(_relation_filepath(self._node, dbname, relname)) + + def remove_toast_file(self, dbname, relname): + """Plan to remove (dbname, relname)'s toast relation file, if any.""" + toastname = _relation_toast(self._node, dbname, relname) + if toastname: + self.remove_relation_file(dbname, toastname) + + def perform_all(self): + """Stop the node, apply every planned corruption, restart the node.""" + self._node.stop() + for relpath in self._corrupt_page: + _corrupt_first_page(relpath) + for relpath in self._remove_relation: + os.unlink(relpath) + self._node.start() + + +def _setup_databases(node): + """Create db1/db2/db3 with decoys and five identical schemas each.""" + for dbname in ("db1", "db2", "db3"): + node.safe_psql("CREATE DATABASE {}".format(dbname)) + node.safe_psql(_AMCHECK_DECOYS, dbname=dbname) + for schema in ("s1", "s2", "s3", "s4", "s5"): + node.safe_psql(_schema_objects_sql(schema), dbname=dbname) + + +def _plan_db1_corruptions(plan): + """Plan all the db1 corruptions across schemas s1..s5.""" + # s1: corrupt indexes. + plan.remove_relation_file("db1", "s1.t1_btree") + plan.corrupt_first_page("db1", "s1.t2_btree") + # s2: corrupt tables. + plan.remove_relation_file("db1", "s2.t1") + plan.corrupt_first_page("db1", "s2.t2") + # s3: corrupt tables, partitions, matviews, and btrees. + plan.remove_relation_file("db1", "s3.t1") + plan.corrupt_first_page("db1", "s3.t2") + plan.remove_relation_file("db1", "s3.t1_mv") + plan.remove_relation_file("db1", "s3.p1_1") + plan.corrupt_first_page("db1", "s3.t2_mv") + plan.corrupt_first_page("db1", "s3.p2_1") + plan.remove_relation_file("db1", "s3.t1_btree") + plan.corrupt_first_page("db1", "s3.t2_btree") + # s4: corrupt only the toast table. + plan.remove_toast_file("db1", "s4.t2") + # s5: corrupt object types amcheck does not support (must not error). + plan.remove_relation_file("db1", "s5.seq1") + plan.remove_relation_file("db1", "s5.t1_hash") + plan.remove_relation_file("db1", "s5.t1_gist") + plan.remove_relation_file("db1", "s5.t1_gin") + plan.remove_relation_file("db1", "s5.t1_brin") + plan.remove_relation_file("db1", "s5.t1_spgist") + plan.corrupt_first_page("db1", "s5.seq2") + plan.corrupt_first_page("db1", "s5.t2_hash") + plan.corrupt_first_page("db1", "s5.t2_gist") + plan.corrupt_first_page("db1", "s5.t2_gin") + plan.corrupt_first_page("db1", "s5.t2_brin") + plan.corrupt_first_page("db1", "s5.t2_spgist") + + +_NO_OUTPUT_RE = r"^$" +_LINE_POINTER_RE = r"line pointer" +_MISSING_FILE_RE = r'could not open file ".*": No such file or directory' +_INDEX_MISSING_FORK_RE = r'index ".*" lacks a main relation fork' + + +def _check_corruption_reports(node, cmd): + """All command_checks_all assertions over the corrupted databases.""" + corrupt_set = [_INDEX_MISSING_FORK_RE, _LINE_POINTER_RE, _MISSING_FILE_RE] + + node.command_checks_all( + cmd + ["db1"], + 2, + corrupt_set, + [_NO_OUTPUT_RE], + "pg_amcheck all schemas, tables and indexes in database db1", + ) + node.command_checks_all( + cmd + ["--database", "db1", "--database", "db2", "--database", "db3"], + 2, + corrupt_set, + [_NO_OUTPUT_RE], + "pg_amcheck all schemas, tables and indexes in databases db1, db2, and db3", + ) + node.command_checks_all( + cmd + ["--all", "--schema", "s1", "--index", "t1_btree"], + 2, + [_INDEX_MISSING_FORK_RE], + [ + r'pg_amcheck: warning: skipping database "postgres": ' + r"amcheck is not installed" + ], + "pg_amcheck index s1.t1_btree reports missing main relation fork", + ) + node.command_checks_all( + cmd + ["--database", "db1", "--schema", "s1", "--index", "t2_btree"], + 2, + [r".+"], + [_NO_OUTPUT_RE], + "pg_amcheck index s1.s2 reports index corruption", + ) + node.command_checks_all( + cmd + ["--table", "s1.*", "--no-dependent-indexes", "db1"], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck of db1.s1 excluding indexes", + ) + node.command_checks_all( + cmd + ["--table", "s1.*", "--no-dependent-indexes", "db2"], + 2, + [_MISSING_FILE_RE], + [_NO_OUTPUT_RE], + "pg_amcheck of db2.s1 excluding indexes", + ) + node.command_checks_all( + cmd + ["--schema", "s3", "db1"], + 2, + corrupt_set, + [_NO_OUTPUT_RE], + "pg_amcheck schema s3 reports table and index errors", + ) + + +def _check_toast_and_exclusions(node, cmd): + """Toast handling and the schema/table/index exclusion assertions.""" + node.command_checks_all( + cmd + ["--schema", "s4", "db1"], + 2, + [_MISSING_FILE_RE], + [_NO_OUTPUT_RE], + "pg_amcheck in schema s4 reports toast corruption", + ) + node.command_checks_all( + cmd + + [ + "--no-dependent-toast", + "--exclude-toast-pointers", + "--schema", + "s4", + "db1", + ], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck in schema s4 excluding toast reports no corruption", + ) + node.command_checks_all( + cmd + ["--schema", "s5", "db1"], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck over schema s5 reports no corruption", + ) + node.command_checks_all( + cmd + + [ + "--schema", + "s1", + "--exclude-index", + "t1_btree", + "--exclude-index", + "t2_btree", + "db1", + ], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck over schema s1 with corrupt indexes excluded reports no " + "corruption", + ) + node.command_checks_all( + cmd + ["--table", "s1.*", "--no-dependent-indexes", "db1"], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck over schema s1 with all indexes excluded reports no corruption", + ) + node.command_checks_all( + cmd + + [ + "--schema", + "s2", + "--exclude-table", + "t1", + "--exclude-table", + "t2", + "db1", + ], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck over schema s2 with corrupt tables excluded reports no " + "corruption", + ) + + +def _check_block_range_and_modes(node, cmd): + """Bad block-range arguments and the index-mode smoke tests.""" + node.command_fails_like( + cmd + ["--schema", "s5", "--startblock", "junk", "db1"], + r"invalid start block", + "pg_amcheck rejects garbage startblock", + ) + node.command_fails_like( + cmd + ["--schema", "s5", "--endblock", "1234junk", "db1"], + r"invalid end block", + "pg_amcheck rejects garbage endblock", + ) + node.command_fails_like( + cmd + ["--schema", "s5", "--startblock", "5", "--endblock", "4", "db1"], + r"end block precedes start block", + "pg_amcheck rejects invalid block range", + ) + node.command_checks_all( + cmd + ["--schema", "s1", "--index", "t1_btree", "--parent-check", "db1"], + 2, + [_INDEX_MISSING_FORK_RE], + [_NO_OUTPUT_RE], + "pg_amcheck smoke test --parent-check", + ) + node.command_checks_all( + cmd + + [ + "--schema", + "s1", + "--index", + "t1_btree", + "--heapallindexed", + "--rootdescend", + "db1", + ], + 2, + [_INDEX_MISSING_FORK_RE], + [_NO_OUTPUT_RE], + "pg_amcheck smoke test --heapallindexed --rootdescend", + ) + node.command_checks_all( + cmd + + [ + "--database", + "db1", + "--database", + "db2", + "--database", + "db3", + "--exclude-schema", + "s*", + ], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck excluding all corrupt schemas", + ) + + +def _check_checkunique(node, cmd): + """--checkunique smoke tests, including the unsupported-version warning.""" + node.command_checks_all( + cmd + + [ + "--schema", + "s1", + "--index", + "t1_btree", + "--parent-check", + "--checkunique", + "db1", + ], + 2, + [_INDEX_MISSING_FORK_RE], + [_NO_OUTPUT_RE], + "pg_amcheck smoke test --parent-check --checkunique", + ) + node.command_checks_all( + cmd + + [ + "--schema", + "s1", + "--index", + "t1_btree", + "--heapallindexed", + "--rootdescend", + "--checkunique", + "db1", + ], + 2, + [_INDEX_MISSING_FORK_RE], + [_NO_OUTPUT_RE], + "pg_amcheck smoke test --heapallindexed --rootdescend --checkunique", + ) + node.command_checks_all( + cmd + + [ + "--checkunique", + "--database", + "db1", + "--database", + "db2", + "--database", + "db3", + "--exclude-schema", + "s*", + ], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck excluding all corrupt schemas with --checkunique option", + ) + node.safe_psql( + "DROP EXTENSION amcheck;\n" + "CREATE EXTENSION amcheck WITH SCHEMA amcheck_schema VERSION '1.3' ;", + dbname="db3", + ) + node.command_checks_all( + cmd + ["--checkunique", "db3"], + 0, + [_NO_OUTPUT_RE], + [ + r"pg_amcheck: warning: option --checkunique is not supported by " + r"amcheck version 1.3" + ], + "pg_amcheck smoke test --checkunique", + ) + + +def test_003_check(create_pg): + """pg_amcheck detects planned multi-database, multi-schema corruption.""" + node = create_pg("test", no_data_checksums=True, start=False) + node.append_conf("autovacuum=off") + node.start() + + _setup_databases(node) + + cmd = ["pg_amcheck", "--port", str(node.port)] + + # No corruption yet: nothing reported. + node.command_checks_all( + cmd + ["--database", "db1", "--database", "db2", "--database", "db3"], + 0, + [_NO_OUTPUT_RE], + [_NO_OUTPUT_RE], + "pg_amcheck prior to corruption", + ) + + plan = _CorruptionPlan(node) + _plan_db1_corruptions(plan) + # db2: corrupt s1.t1 and its btree; leave db3 clean. + plan.remove_relation_file("db2", "s1.t1") + plan.remove_relation_file("db2", "s1.t1_btree") + plan.perform_all() + + _check_corruption_reports(node, cmd) + _check_toast_and_exclusions(node, cmd) + _check_block_range_and_modes(node, cmd) + _check_checkunique(node, cmd) diff --git a/src/bin/pg_amcheck/pyt/test_004_verify_heapam.py b/src/bin/pg_amcheck/pyt/test_004_verify_heapam.py new file mode 100644 index 0000000000000..1c17ed87d4da3 --- /dev/null +++ b/src/bin/pg_amcheck/pyt/test_004_verify_heapam.py @@ -0,0 +1,545 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_amcheck/t/004_verify_heapam.pl. + +Demonstrates that pg_amcheck identifies specific kinds of within-page heap +corruption. A table with a precisely-known on-disk layout is built, then +individual tuples are corrupted one way each (xmin/xmax thresholds, t_hoff, +attribute counts, varlena/toast pointers, and HOT/redirect update-chain +breakage) by reading/rewriting the raw HeapTupleHeader with struct. pg_amcheck +must report exactly the expected corruption messages. +""" + +import os +import struct + +import pytest + +# HeapTupleHeader layout for our (BIGINT, TEXT, TEXT) table, mirroring the Perl +# pack code 'LLLSSSSSCCLLCCCCCCCCCCllLL' (native byte order, 58 bytes total). +_PACK_FMT = "=LLLHHHHHBBLLBBBBBBBBBBllLL" +_PACK_LENGTH = 58 +_FIELDS = [ + "t_xmin", + "t_xmax", + "t_field3", + "bi_hi", + "bi_lo", + "ip_posid", + "t_infomask2", + "t_infomask", + "t_hoff", + "t_bits", + "a_1", + "a_2", + "b_header", + "b_body1", + "b_body2", + "b_body3", + "b_body4", + "b_body5", + "b_body6", + "b_body7", + "c_va_header", + "c_va_vartag", + "c_va_rawsize", + "c_va_extinfo", + "c_va_valueid", + "c_va_toastrelid", +] + +# #define constants from access/htup_details.h used while corrupting. +_HEAP_HASNULL = 0x0001 +_HEAP_XMIN_COMMITTED = 0x0100 +_HEAP_XMIN_INVALID = 0x0200 +_HEAP_XMAX_COMMITTED = 0x0400 +_HEAP_XMAX_INVALID = 0x0800 +_HEAP_NATTS_MASK = 0x07FF +_HEAP_XMAX_IS_MULTI = 0x1000 +_HEAP_HOT_UPDATED = 0x4000 +_HEAP_ONLY_TUPLE = 0x8000 + +_ROWCOUNT = 44 +_ROWCOUNT_BASIC = 16 + +_U16 = 0xFFFF + + +def _read_tuple(fh, offset): + """Read and unpack one table tuple's header into a field dict.""" + fh.seek(offset) + buffer = fh.read(_PACK_LENGTH) + values = struct.unpack(_PACK_FMT, buffer) + tup = dict(zip(_FIELDS, values)) + tup["b"] = "".join(chr(tup["b_body{}".format(i)]) for i in range(1, 8)) + return tup + + +def _write_tuple(fh, offset, tup): + """Pack and write a (possibly modified) tuple header back to the file.""" + buffer = struct.pack(_PACK_FMT, *(tup[f] for f in _FIELDS)) + fh.seek(offset) + fh.write(buffer) + + +def _header(blkno, offnum=None, attnum=None): + """Expected verify_heapam() message prefix for the given location.""" + base = r'heap table "postgres\.public\.test"' + if attnum is not None: + return r"{}, block {}, offset {}, attribute {}:\s+".format( + base, blkno, offnum, attnum + ) + if offnum is not None: + return r"{}, block {}, offset {}:\s+".format(base, blkno, offnum) + return r"{}, block {}:\s+".format(base, blkno) + + +_SETUP_SQL = """\ +CREATE TABLE public.test (a BIGINT, b TEXT, c TEXT); +ALTER TABLE public.test SET (autovacuum_enabled=false); +ALTER TABLE public.test ALTER COLUMN c SET STORAGE EXTERNAL; +CREATE INDEX test_idx ON public.test(a, b); +""" + +_JUNK_SQL = """\ +CREATE TABLE public.junk AS SELECT 'junk'::TEXT AS junk_column; +ALTER TABLE public.junk SET (autovacuum_enabled=false); +VACUUM FREEZE public.junk +""" + + +def _populate(node): + """Insert all rows / HOT chains needed for the corruption scenarios.""" + node.safe_psql( + "INSERT INTO public.test (a, b, c)\n" + " SELECT\n" + " x'DEADF9F9DEADF9F9'::bigint,\n" + " 'abcdefg',\n" + " repeat('w', 10000)\n" + "FROM generate_series(1, {});\n" + "VACUUM FREEZE public.test;".format(_ROWCOUNT_BASIC) + ) + # offnum 17/18, redirects after HOT prune; 19/20 the HOT tuples. + node.safe_psql( + "INSERT INTO public.test (a, b, c)\n" + " VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',\n" + " generate_series(1,2));\n" + "UPDATE public.test SET c = 'a' WHERE c = '1';\n" + "UPDATE public.test SET c = 'a' WHERE c = '2';" + ) + node.safe_psql( + "INSERT INTO public.test (a, b, c)\n" + " VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',\n" + " generate_series(3,6));\n" + "UPDATE public.test SET c = 'a' WHERE c = '3';\n" + "UPDATE public.test SET c = 'a' WHERE c = '4';" + ) + # Aborted HOT update then re-use of the slot. + node.safe_psql( + "BEGIN;\n" + "UPDATE public.test SET c = 'a' WHERE c = '5';\n" + "ABORT;\n" + "VACUUM FREEZE public.test;" + ) + node.safe_psql( + "UPDATE public.test SET c = 'a' WHERE c = '6';\nVACUUM FREEZE public.test;" + ) + # HOT chain data (no freeze). + node.safe_psql( + "INSERT INTO public.test (a, b, c)\n" + " VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',\n" + " generate_series(7,15));\n" + "UPDATE public.test SET c = 'a' WHERE c = '7';\n" + "UPDATE public.test SET c = 'a' WHERE c = '10';\n" + "UPDATE public.test SET c = 'a' WHERE c = '11';\n" + "UPDATE public.test SET c = 'a' WHERE c = '12';\n" + "UPDATE public.test SET c = 'a' WHERE c = '13';\n" + "UPDATE public.test SET c = 'a' WHERE c = '14';\n" + "UPDATE public.test SET c = 'a' WHERE c = '15';" + ) + node.safe_psql("BEGIN;\nUPDATE public.test SET c = 'a' WHERE c = '9';\nABORT;") + node.safe_psql("BEGIN;\nPREPARE TRANSACTION 'in_progress_tx';") + + +def _get_lp_offsets(node): + """Per-line-pointer offsets (-1 for redirects), in offnum order.""" + text = node.safe_psql( + "SELECT CASE WHEN lp_flags = 2 THEN -1 ELSE lp_off END\n" + "FROM heap_page_items(get_raw_page('test', 'main', 0))" + ) + return [int(x) for x in text.split("\n")] + + +def _detect_layout_and_endianness(node, relpath, lp_off): + """Verify the expected page layout; return 'little'/'big' or skip. + + Mirrors the Perl sanity checks that skip_all if the on-disk layout differs + from expectations on this platform. + """ + endianness = None + with open(relpath, "rb") as fh: + for tupidx in range(_ROWCOUNT): + offset = lp_off[tupidx] + if offset == -1: + continue + tup = _read_tuple(fh, offset) + if ( + tup["a_1"] != 0xDEADF9F9 + or tup["a_2"] != 0xDEADF9F9 + or tup["b"] != "abcdefg" + ): + node.clean_node() + pytest.skip( + "Page layout of index {} differs from our " + "expectations".format(tupidx) + ) + endianness = "little" if tup["b_header"] == 0x11 else "big" + assert endianness is not None + return endianness + + +def _corrupt_basic_tuple(tup, offnum, ctx): + """Apply the offnum-specific basic-validation corruption (offnum 1-16).""" + if offnum <= 8: + _corrupt_basic_tuple_lo(tup, offnum, ctx) + else: + _corrupt_basic_tuple_hi(tup, offnum, ctx) + + +def _uncommit_xmin(tup): + """Clear the XMIN_COMMITTED and XMIN_INVALID infomask bits.""" + tup["t_infomask"] &= ~_HEAP_XMIN_COMMITTED & _U16 + tup["t_infomask"] &= ~_HEAP_XMIN_INVALID & _U16 + + +# verify_heapam() message bodies (single literals, no implicit concatenation). +_M_HOFF = ( + r"tuple data should begin at byte 24, but actually begins at byte {} " + r"\(3 attributes, no nulls\)" +) +_M_NATTS = r"number of attributes {} exceeds maximum 3 expected for table" +_M_XMIN_FREEZE = r"xmin {} precedes relation freeze threshold 0:\d+" +_M_XMIN_OLDEST = r"xmin {} precedes oldest valid transaction ID 0:\d+" +_M_XMAX_OLDEST = r"xmax {} precedes oldest valid transaction ID 0:\d+" +_M_XMIN_FUTURE = r"xmin {} equals or exceeds next valid transaction ID 0:\d+" +_M_HOFF_BEYOND = r"data begins at offset 152 beyond the tuple length 58" +_M_HOFF_NULLS = ( + r"tuple data should begin at byte 280, but actually begins at byte 24 " + r"\(2047 attributes, has nulls\)" +) +_M_ATTR_LEN = ( + r"attribute with length \d+ ends at offset \d+ beyond total tuple length \d+" +) +_M_TOAST = r"toast value \d+ not found in toast table" +_M_MXID_EXCEEDS = ( + r"multitransaction ID 4 equals or exceeds next valid multitransaction ID 1" +) +_M_MXID_PRECEDES = ( + r"multitransaction ID 4000000000 precedes relation minimum " + r"multitransaction ID threshold 1" +) + + +def _corrupt_basic_tuple_lo(tup, offnum, ctx): + """Basic-validation corruptions for offnum 1-8 (xmin/xmax/t_hoff).""" + header = _header(0, offnum) + expected = ctx["expected"] + if offnum == 1: + xmin = ctx["relfrozenxid"] - 1 + tup["t_xmin"] = xmin + _uncommit_xmin(tup) + expected.append(header + _M_XMIN_FREEZE.format(xmin)) + elif offnum == 2: + tup["t_xmin"] = 3 + _uncommit_xmin(tup) + expected.append(header + _M_XMIN_OLDEST.format(3)) + elif offnum == 3: + tup["t_xmin"] = 4026531839 + _uncommit_xmin(tup) + expected.append(header + _M_XMIN_OLDEST.format(4026531839)) + elif offnum == 4: + tup["t_xmax"] = 4026531839 + tup["t_infomask"] &= ~_HEAP_XMAX_INVALID & _U16 + expected.append(header + _M_XMAX_OLDEST.format(4026531839)) + elif offnum == 5: + tup["t_hoff"] += 128 + expected.append(header + _M_HOFF_BEYOND) + expected.append(header + _M_HOFF.format(152)) + elif offnum == 6: + tup["t_hoff"] += 3 + expected.append(header + _M_HOFF.format(27)) + elif offnum == 7: + tup["t_hoff"] -= 8 + expected.append(header + _M_HOFF.format(16)) + elif offnum == 8: + tup["t_hoff"] -= 3 + expected.append(header + _M_HOFF.format(21)) + + +def _corrupt_basic_tuple_hi(tup, offnum, ctx): + """Basic-validation corruptions for offnum 9-16 (natts/varlena/xmax).""" + header = _header(0, offnum) + expected = ctx["expected"] + if offnum == 9: + tup["t_infomask2"] |= _HEAP_NATTS_MASK + expected.append(header + _M_NATTS.format(2047)) + elif offnum == 10: + tup["t_infomask"] |= _HEAP_HASNULL + tup["t_infomask2"] |= _HEAP_NATTS_MASK + tup["t_bits"] = 0xAA + expected.append(header + _M_HOFF_NULLS) + elif offnum == 11: + tup["t_infomask"] |= _HEAP_HASNULL + tup["t_infomask2"] |= _HEAP_NATTS_MASK & 0x40 + tup["t_bits"] = 0xAA + tup["t_hoff"] = 32 + expected.append(header + _M_NATTS.format(67)) + elif offnum == 12: + tup["b_header"] = 0xFC if ctx["endianness"] == "little" else 0x3F + tup["b_body1"] = 0xFF + tup["b_body2"] = 0xFF + tup["b_body3"] = 0xFF + expected.append(_header(0, offnum, 1) + _M_ATTR_LEN) + elif offnum == 13: + tup["c_va_valueid"] = 0xFFFFFFFF + expected.append(_header(0, offnum, 2) + _M_TOAST) + elif offnum == 14: + tup["t_infomask"] |= _HEAP_XMAX_COMMITTED + tup["t_infomask"] |= _HEAP_XMAX_IS_MULTI + tup["t_xmax"] = 4 + expected.append(header + _M_MXID_EXCEEDS) + elif offnum == 15: + tup["t_infomask"] |= _HEAP_XMAX_COMMITTED + tup["t_infomask"] |= _HEAP_XMAX_IS_MULTI + tup["t_xmax"] = 4000000000 + expected.append(header + _M_MXID_PRECEDES) + elif offnum == 16: + tup["t_xmin"] = 123456 + _uncommit_xmin(tup) + expected.append(header + _M_XMIN_FUTURE.format(123456)) + + +# Chain/redirect message bodies (single literals, no implicit concatenation). +_M_REDIR_NONHOT = ( + r"redirected line pointer points to a non-heap-only tuple at offset \d+" +) +_M_REDIR_REDIR = ( + r"redirected line pointer points to another redirected line pointer " + r"at offset \d+" +) +_M_REDIR_DUP = ( + r"redirect line pointer points to offset \d+, but offset \d+ also points there" +) +_M_NONHOT_PRODUCED_HOT = ( + r"non-heap-only update produced a heap-only tuple at offset \d+" +) +_M_NEWVER_DUP = ( + r"tuple points to new version at offset \d+, but offset \d+ also points there" +) +_M_ABORTED_COMMITTED = ( + r"tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ " + r"with committed xmin \d+" +) +_M_ROOT_HOT = r"tuple is root of chain but is marked as heap-only tuple" +_M_HOT_NO_UPDATE = r"tuple is heap only, but not the result of an update" +_M_HOT_PRODUCED_NONHOT = ( + r"heap-only update produced a non-heap only tuple at offset \d+" +) +_M_HOT_XMAX0 = r"tuple has been HOT updated, but xmax is 0" +_M_INPROGRESS_COMMITTED = ( + r"tuple with in-progress xmin \d+ was updated to produce a tuple at " + r"offset \d+ with committed xmin \d+" +) +_M_ABORTED_INPROGRESS = ( + r"tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ " + r"with in-progress xmin \d+" +) + + +def _corrupt_chain_tuple(fh, tup, offnum, ctx): + """Apply the offnum-specific HOT/redirect chain corruption (offnum>=17). + + Returns True if tup should be written back, False otherwise (some offnums + rewrite a raw line pointer directly or leave the tuple unchanged). + """ + if offnum in (17, 18, 19, 22): + return _corrupt_chain_redirects(fh, tup, offnum, ctx) + return _corrupt_chain_hot(tup, offnum, ctx) + + +def _corrupt_chain_redirects(fh, tup, offnum, ctx): + """Redirect-related corruptions (offnum 17, 18, 19, 22).""" + header = _header(0, offnum) + expected = ctx["expected"] + little = ctx["endianness"] == "little" + if offnum == 17: + assert tup is None, "offnum 17 should be a redirect" + expected.append(header + _M_REDIR_NONHOT) + return False + if offnum == 18: + assert tup is None, "offnum 18 should be a redirect" + fh.seek(92) + fh.write(struct.pack("=L", 0x00010011 if little else 0x00230000)) + expected.append(header + _M_REDIR_REDIR) + return False + if offnum == 19: + tup["t_infomask2"] &= ~_HEAP_ONLY_TUPLE & _U16 + return True + # offnum == 22 + fh.seek(108) + fh.write(struct.pack("=L", 0x00010019 if little else 0x00330000)) + expected.append(header + _M_REDIR_DUP) + return False + + +def _corrupt_chain_hot(tup, offnum, ctx): # pylint: disable=too-many-return-statements + """HOT update-chain corruptions (offnum 28-43).""" + header = _header(0, offnum) + expected = ctx["expected"] + if offnum == 28: + tup["t_infomask2"] &= ~_HEAP_HOT_UPDATED & _U16 + expected.append(header + _M_NONHOT_PRODUCED_HOT) + ctx["pred_xmax"] = tup["t_xmax"] + ctx["pred_posid"] = tup["ip_posid"] + return True + if offnum == 29: + tup["t_xmax"] = ctx["pred_xmax"] + tup["ip_posid"] = ctx["pred_posid"] + expected.append(header + _M_NEWVER_DUP) + return True + if offnum == 30: + ctx["aborted_xid"] = tup["t_xmax"] + return False + if offnum == 31: + tup["t_xmin"] = ctx["aborted_xid"] + tup["t_infomask"] &= ~_HEAP_XMIN_COMMITTED & _U16 + expected.append(header + _M_ABORTED_COMMITTED) + return True + if offnum == 32: + tup["t_infomask2"] |= _HEAP_ONLY_TUPLE + expected.append(header + _M_ROOT_HOT) + expected.append(header + _M_HOT_NO_UPDATE) + return True + if offnum == 33: + expected.append(header + _M_HOT_PRODUCED_NONHOT) + return False + if offnum == 34: + tup["t_xmax"] = 0 + expected.append(header + _M_HOT_XMAX0) + return True + if offnum == 35: + tup["t_xmin"] = ctx["in_progress_xid"] + tup["t_infomask"] &= ~_HEAP_XMIN_COMMITTED & _U16 + expected.append(header + _M_INPROGRESS_COMMITTED) + return True + if offnum == 36: + tup["t_xmin"] = ctx["aborted_xid"] + tup["t_xmax"] = ctx["in_progress_xid"] + tup["t_infomask"] &= ~_HEAP_XMIN_COMMITTED & _U16 + expected.append(header + _M_ABORTED_INPROGRESS) + return True + if offnum == 40: + tup["t_infomask2"] &= ~_HEAP_ONLY_TUPLE & _U16 + return True + if offnum == 43: + tup["t_xmin"] = ctx["in_progress_xid"] + tup["t_infomask"] &= ~_HEAP_XMIN_COMMITTED & _U16 + return True + return False + + +def _corrupt_all_tuples(relpath, lp_off, ctx): + """Walk every line pointer, applying one corruption per relevant offnum.""" + with open(relpath, "r+b") as fh: + for tupidx in range(_ROWCOUNT): + offnum = tupidx + 1 + offset = lp_off[tupidx] + tup = None if offset == -1 else _read_tuple(fh, offset) + if offnum <= _ROWCOUNT_BASIC: + if tup is None: + continue + _corrupt_basic_tuple(tup, offnum, ctx) + _write_tuple(fh, offset, tup) + else: + should_write = _corrupt_chain_tuple(fh, tup, offnum, ctx) + if should_write and tup is not None: + _write_tuple(fh, offset, tup) + + +def test_004_verify_heapam(create_pg): + """pg_amcheck reports each deliberately-injected heap corruption type.""" + os.umask(0o077) + node = create_pg("test", no_data_checksums=True, start=False) + node.append_conf("autovacuum=off") + node.append_conf("max_prepared_transactions=10") + node.start() + port = node.port + node.safe_psql("CREATE EXTENSION amcheck") + node.safe_psql("CREATE EXTENSION pageinspect") + + node.safe_psql("VACUUM FREEZE") + node.safe_psql(_SETUP_SQL) + node.safe_psql(_JUNK_SQL) + + rel = node.safe_psql("SELECT pg_relation_filepath('public.test')") + relpath = os.path.join(node.datadir, rel) + + _populate(node) + + in_progress_xid = node.safe_psql("SELECT transaction FROM pg_prepared_xacts;") + relfrozenxid = int( + node.safe_psql("select relfrozenxid from pg_class where relname = 'test'") + ) + datfrozenxid = int( + node.safe_psql( + "select datfrozenxid from pg_database where datname = 'postgres'" + ) + ) + if datfrozenxid <= 3 or datfrozenxid >= relfrozenxid: + node.clean_node() + pytest.skip( + "Xid thresholds not as expected: got datfrozenxid = {}, " + "relfrozenxid = {}".format(datfrozenxid, relfrozenxid) + ) + + lp_off = _get_lp_offsets(node) + assert len(lp_off) == _ROWCOUNT, "row offset counts mismatch" + + node.stop() + endianness = _detect_layout_and_endianness(node, relpath, lp_off) + node.start() + + node.command_ok( + ["pg_amcheck", "--port", str(port), "postgres"], + "pg_amcheck test table, prior to corruption", + ) + node.command_ok( + ["pg_amcheck", "--port", str(port), "postgres"], + "pg_amcheck test table and index, prior to corruption", + ) + + node.stop() + + ctx = { + "relfrozenxid": relfrozenxid, + "in_progress_xid": int(in_progress_xid), + "endianness": endianness, + "expected": [], + "pred_xmax": None, + "pred_posid": None, + "aborted_xid": None, + } + _corrupt_all_tuples(relpath, lp_off, ctx) + node.start() + + node.command_checks_all( + ["pg_amcheck", "--no-dependent-indexes", "--port", str(port), "postgres"], + 2, + ctx["expected"], + [], + "Expected corruption message output", + ) + node.safe_psql("COMMIT PREPARED 'in_progress_tx';") + + node.teardown_node() + node.clean_node() diff --git a/src/bin/pg_amcheck/pyt/test_005_opclass_damage.py b/src/bin/pg_amcheck/pyt/test_005_opclass_damage.py new file mode 100644 index 0000000000000..aeb2e25485230 --- /dev/null +++ b/src/bin/pg_amcheck/pyt/test_005_opclass_damage.py @@ -0,0 +1,50 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_amcheck/t/005_opclass_damage.pl. + +pg_amcheck detects btree indexes whose ordering depends on a fickle (non-deterministic) operator class, and validates unique-constraint checking. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_005_opclass_damage(create_pg): + """pg_amcheck detects btree indexes whose ordering depends on a fickle (non-deterministic) operator class.""" + node = create_pg("test", start=False) + node.start() + node.safe_psql( + "CREATE EXTENSION amcheck;\n\n\tCREATE FUNCTION int4_asc_cmp (a int4, b int4) RETURNS int LANGUAGE sql AS $$\n\t\tSELECT CASE WHEN $1 = $2 THEN 0 WHEN $1 > $2 THEN 1 ELSE -1 END; $$;\n\n\tCREATE FUNCTION ok_cmp (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT\n\t\t\tCASE WHEN $1 < $2 THEN -1\n\t\t\t\t WHEN $1 > $2 THEN 1\n\t\t\t\t ELSE 0\n\t\t\tEND;\n\t$$;\n\n\tCREATE OPERATOR CLASS int4_fickle_ops FOR TYPE int4 USING btree AS\n\t OPERATOR 1 < (int4, int4), OPERATOR 2 <= (int4, int4),\n\t OPERATOR 3 = (int4, int4), OPERATOR 4 >= (int4, int4),\n\t OPERATOR 5 > (int4, int4), FUNCTION 1 int4_asc_cmp(int4, int4);\n\n\tCREATE OPERATOR CLASS int4_unique_ops FOR TYPE int4 USING btree AS\n\t\tOPERATOR 1 < (int4, int4), OPERATOR 2 <= (int4, int4),\n\t\tOPERATOR 3 = (int4, int4), OPERATOR 4 >= (int4, int4),\n\t\tOPERATOR 5 > (int4, int4), FUNCTION 1 ok_cmp(int4, int4);\n\n\tCREATE TABLE int4tbl (i int4);\n\tINSERT INTO int4tbl (SELECT * FROM generate_series(1,1000) gs);\n\tCREATE INDEX fickleidx ON int4tbl USING btree (i int4_fickle_ops);\n\tCREATE UNIQUE INDEX bttest_unique_idx\n\t\t\t\t\t\tON int4tbl\n\t\t\t\t\t\tUSING btree (i int4_unique_ops)\n\t\t\t\t\t\tWITH (deduplicate_items = off);" + ) + node.command_like( + ["pg_amcheck", "--port", str(node.port), "postgres"], + r"""^$""", + "pg_amcheck all schemas, tables and indexes reports no corruption", + ) + node.safe_psql( + "CREATE FUNCTION int4_desc_cmp (int4, int4) RETURNS int LANGUAGE sql AS $$\n\t\tSELECT CASE WHEN $1 = $2 THEN 0 WHEN $1 > $2 THEN -1 ELSE 1 END; $$;\n\tUPDATE pg_catalog.pg_amproc\n\t\tSET amproc = 'int4_desc_cmp'::regproc\n\t\tWHERE amproc = 'int4_asc_cmp'::regproc" + ) + node.command_checks_all( + ["pg_amcheck", "--port", str(node.port), "postgres"], + 2, + [r'''item order invariant violated for index "fickleidx"'''], + [], + "pg_amcheck all schemas, tables and indexes reports fickleidx corruption", + ) + node.safe_psql( + "UPDATE pg_catalog.pg_amproc\n\t\tSET amproc = 'int4_asc_cmp'::regproc\n\t\tWHERE amproc = 'int4_desc_cmp'::regproc" + ) + node.command_like( + ["pg_amcheck", "--checkunique", "--port", str(node.port), "postgres"], + r"""^$""", + "pg_amcheck all schemas, tables and indexes reports no corruption", + ) + node.safe_psql( + "CREATE FUNCTION bad_cmp (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT\n\t\t\tCASE WHEN ($1 = 768 AND $2 = 769) OR\n\t\t\t\t\t ($1 = 769 AND $2 = 768) THEN 0\n\t\t\t\t WHEN $1 < $2 THEN -1\n\t\t\t\t WHEN $1 > $2 THEN 1\n\t\t\t\t ELSE 0\n\t\t\tEND;\n\t$$;\n\n\tUPDATE pg_catalog.pg_amproc\n\t\tSET amproc = 'bad_cmp'::regproc\n\t\tWHERE amproc = 'ok_cmp'::regproc" + ) + node.command_checks_all( + ["pg_amcheck", "--checkunique", "--port", str(node.port), "postgres"], + 2, + [r'''index uniqueness is violated for index "bttest_unique_idx"'''], + [], + "pg_amcheck all schemas, tables and indexes reports bttest_unique_idx corruption", + ) diff --git a/src/bin/pg_archivecleanup/Makefile b/src/bin/pg_archivecleanup/Makefile index 93fd703f22591..2d23236a24e1c 100644 --- a/src/bin/pg_archivecleanup/Makefile +++ b/src/bin/pg_archivecleanup/Makefile @@ -31,6 +31,7 @@ clean distclean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_archivecleanup/meson.build b/src/bin/pg_archivecleanup/meson.build index 4527a3816b35c..c1ecd2259448a 100644 --- a/src/bin/pg_archivecleanup/meson.build +++ b/src/bin/pg_archivecleanup/meson.build @@ -26,6 +26,11 @@ tests += { 't/010_pg_archivecleanup.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_010_pg_archivecleanup.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_archivecleanup/pyt/test_010_pg_archivecleanup.py b/src/bin/pg_archivecleanup/pyt/test_010_pg_archivecleanup.py new file mode 100644 index 0000000000000..d5a5b1247e4ad --- /dev/null +++ b/src/bin/pg_archivecleanup/pyt/test_010_pg_archivecleanup.py @@ -0,0 +1,148 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_archivecleanup/t/010_pg_archivecleanup.pl.""" + +import re + +# Each entry is (name, present): whether the file should still exist after +# pg_archivecleanup runs. +_WALFILES_VERBOSE = [ + ("00000001000000370000000D", False), + ("00000001000000370000000E", True), +] +_WALFILES_WITH_GZ = [ + ("00000001000000370000000C.gz", False), + ("00000001000000370000000D", False), + ("00000001000000370000000D.backup", True), + ("00000001000000370000000E", True), + ("00000001000000370000000F.partial", True), + ("unrelated_file", True), +] +_WALFILES_CLEAN_BACKUP_HISTORY = [ + ("00000001000000370000000D", False), + ("00000001000000370000000D.00000028.backup", False), + ("00000001000000370000000E", True), + ("00000001000000370000000F.partial", True), + ("unrelated_file", True), +] + + +def _create_files(tempdir, walfiles): + for name, _present in walfiles: + (tempdir / name).write_text("CONTENT", encoding="utf-8") + + +def _remove_files(tempdir, walfiles): + for name, _present in walfiles: + (tempdir / name).unlink(missing_ok=True) + + +def _run_check(pg_bin, tempdir, testdata, oldest_kept, test_name, *options): + _create_files(tempdir, testdata) + + pg_bin.command_ok( + ["pg_archivecleanup", *options, str(tempdir), oldest_kept], + "{}: runs".format(test_name), + ) + + for name, present in testdata: + exists = (tempdir / name).is_file() + if present: + assert exists, "{}:{} was not cleaned up".format(test_name, name) + else: + assert not exists, "{}:{} was cleaned up".format(test_name, name) + + _remove_files(tempdir, testdata) + + +def test_pg_archivecleanup(pg_bin, tmp_path): + """pg_archivecleanup argument handling, dry run, and cleanup scenarios.""" + pg_bin.program_help_ok("pg_archivecleanup") + pg_bin.program_version_ok("pg_archivecleanup") + pg_bin.program_options_handling_ok("pg_archivecleanup") + + pg_bin.command_fails_like( + ["pg_archivecleanup"], + r"must specify archive location", + "fails if archive location is not specified", + ) + pg_bin.command_fails_like( + ["pg_archivecleanup", str(tmp_path)], + r"must specify oldest kept WAL file", + "fails if oldest kept WAL file name is not specified", + ) + pg_bin.command_fails_like( + ["pg_archivecleanup", "notexist", "foo"], + r"archive location .* does not exist", + "fails if archive location does not exist", + ) + pg_bin.command_fails_like( + ["pg_archivecleanup", str(tmp_path), "foo", "bar"], + r"too many command-line arguments", + "fails with too many command-line arguments", + ) + pg_bin.command_fails_like( + ["pg_archivecleanup", str(tmp_path), "foo"], + r"invalid file name argument", + "fails with invalid restart file name", + ) + + # Dry run: no files are physically removed, but logs show what would be. + _create_files(tmp_path, _WALFILES_VERBOSE) + result = pg_bin.result( + [ + "pg_archivecleanup", + "--debug", + "--dry-run", + str(tmp_path), + "00000001000000370000000E", + ] + ) + assert result.rc == 0, "pg_archivecleanup dry run: exit code 0" + + for name, present in _WALFILES_VERBOSE: + pattern = r"{}.*would be removed".format(name) + if present: + assert not re.search( + pattern, result.stderr + ), "pg_archivecleanup dry run for {}: matches".format(name) + else: + assert re.search( + pattern, result.stderr + ), "pg_archivecleanup dry run for {}: matches".format(name) + for name, _present in _WALFILES_VERBOSE: + assert (tmp_path / name).is_file(), "{} not removed".format(name) + _remove_files(tmp_path, _WALFILES_VERBOSE) + + _run_check( + pg_bin, + tmp_path, + _WALFILES_WITH_GZ, + "00000001000000370000000E", + "pg_archivecleanup", + "-x.gz", + ) + _run_check( + pg_bin, + tmp_path, + _WALFILES_WITH_GZ, + "00000001000000370000000E.partial", + "pg_archivecleanup with .partial file", + "-x.gz", + ) + _run_check( + pg_bin, + tmp_path, + _WALFILES_WITH_GZ, + "00000001000000370000000E.00000020.backup", + "pg_archivecleanup with .backup file", + "-x.gz", + ) + _run_check( + pg_bin, + tmp_path, + _WALFILES_CLEAN_BACKUP_HISTORY, + "00000001000000370000000E", + "pg_archivecleanup with --clean-backup-history", + "-b", + ) diff --git a/src/bin/pg_checksums/Makefile b/src/bin/pg_checksums/Makefile index b16cfafa0bfa4..625a962b06ef8 100644 --- a/src/bin/pg_checksums/Makefile +++ b/src/bin/pg_checksums/Makefile @@ -42,6 +42,7 @@ clean distclean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_checksums/meson.build b/src/bin/pg_checksums/meson.build index 7b2401cb31b76..b01cad6634a07 100644 --- a/src/bin/pg_checksums/meson.build +++ b/src/bin/pg_checksums/meson.build @@ -28,6 +28,12 @@ tests += { 't/002_actions.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_actions.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_checksums/pyt/test_001_basic.py b/src/bin/pg_checksums/pyt/test_001_basic.py new file mode 100644 index 0000000000000..80cb642901167 --- /dev/null +++ b/src/bin/pg_checksums/pyt/test_001_basic.py @@ -0,0 +1,10 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_checksums/t/001_basic.pl.""" + + +def test_pg_checksums_basic(pg_bin): + """pg_checksums option handling.""" + pg_bin.program_help_ok("pg_checksums") + pg_bin.program_version_ok("pg_checksums") + pg_bin.program_options_handling_ok("pg_checksums") diff --git a/src/bin/pg_checksums/pyt/test_002_actions.py b/src/bin/pg_checksums/pyt/test_002_actions.py new file mode 100644 index 0000000000000..889888e79fb9e --- /dev/null +++ b/src/bin/pg_checksums/pyt/test_002_actions.py @@ -0,0 +1,248 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_checksums/t/002_actions.pl. + +Basic sanity checks for pg_checksums using an initialized cluster. +""" + +import platform + +import pypg + +darwin = platform.system() == "Darwin" + +# Empty files that are correctly named, so pg_checksums passes them through. +_EMPTY_OK_FILES = [ + "99999", + "99999.123", + "99999_fsm", + "99999_init", + "99999_vm", + "99999_init.123", + "99999_fsm.123", + "99999_vm.123", +] + +# Correctly-named relation files which, when filled with junk, must be flagged. +_FAIL_CORRUPT_FILES = [ + "99990", + "99990.123", + "99990_fsm", + "99990_init", + "99990_vm", + "99990_init.123", + "99990_fsm.123", + "99990_vm.123", +] + + +def _check_relation_corruption(pg_bin, node, table, tablespace): + """Corrupt a relation's checksum on a tablespace and check detection.""" + pgdata = node.datadir + + node.safe_psql( + "CREATE TABLE {0} AS SELECT a FROM generate_series(1,10000) AS a;" + " ALTER TABLE {0} SET (autovacuum_enabled=false);".format(table) + ) + node.safe_psql("ALTER TABLE {} SET TABLESPACE {};".format(table, tablespace)) + + file_corrupted = node.safe_psql("SELECT pg_relation_filepath('{}');".format(table)) + relfilenode_corrupted = node.safe_psql( + "SELECT relfilenode FROM pg_class WHERE relname = '{}';".format(table) + ) + + node.stop() + + # Checksums are correct for the single relfilenode (not corrupted yet). + pg_bin.command_ok( + [ + "pg_checksums", + "--check", + "--pgdata", + pgdata, + "--filenode", + relfilenode_corrupted, + ], + "succeeds for single relfilenode on tablespace {} with offline cluster".format( + tablespace + ), + ) + + node.corrupt_page_checksum(file_corrupted, 0) + + node.command_checks_all( + [ + "pg_checksums", + "--check", + "--pgdata", + pgdata, + "--filenode", + relfilenode_corrupted, + ], + 1, + [r"Bad checksums:.*1"], + [r"checksum verification failed"], + "fails with corrupted data for single relfilenode on tablespace {}".format( + tablespace + ), + ) + + node.command_checks_all( + ["pg_checksums", "--check", "--pgdata", pgdata], + 1, + [r"Bad checksums:.*1"], + [r"checksum verification failed"], + "fails with corrupted data on tablespace {}".format(tablespace), + ) + + node.start() + node.safe_psql("DROP TABLE {};".format(table)) + node.stop() + node.command_ok( + ["pg_checksums", "--check", "--pgdata", pgdata], + "succeeds again after table drop on tablespace {}".format(tablespace), + ) + node.start() + + +def _fail_corrupt(node, file): + """pg_checksums must flag a correctly-named relation file full of junk.""" + pgdata = node.datadir + file_name = pgdata / "global" / file + pypg.append_to_file(file_name, "foo") + + node.command_checks_all( + ["pg_checksums", "--check", "--pgdata", pgdata], + 1, + [r"^$"], + [r"could not read block 0 in file.*" + file + r'":'], + "fails for corrupted data in {}".format(file), + ) + + file_name.unlink() + + +def _setup_dummy_files(pgdata): + for name in _EMPTY_OK_FILES: + pypg.append_to_file(pgdata / "global" / name, "") + + # Temporary files/folders with dummy contents, ignored by the scan. + pypg.append_to_file(pgdata / "global" / "pgsql_tmp_123", "foo") + (pgdata / "global" / "pgsql_tmp").mkdir() + pypg.append_to_file(pgdata / "global" / "pgsql_tmp" / "1.1", "foo") + pypg.append_to_file(pgdata / "global" / "pg_internal.init", "foo") + pypg.append_to_file(pgdata / "global" / "pg_internal.init.123", "foo") + + # Non-postgres macOS file, ignored by the scan (skip creating it on macOS). + if not darwin: + pypg.append_to_file(pgdata / "global" / ".DS_Store", "foo") + + +def test_pg_checksums_actions(pg_bin, create_pg): + """Enable/disable/verify checksums and detect corruption.""" + node = create_pg("node_checksum", start=False, extra=["--no-data-checksums"]) + pgdata = node.datadir + + pg_bin.command_like( + ["pg_controldata", pgdata], + r"Data page checksum version:.*0", + "checksums disabled in control file", + ) + + _setup_dummy_files(pgdata) + + pg_bin.command_ok( + ["pg_checksums", "--enable", "--no-sync", "--pgdata", pgdata], + "checksums successfully enabled in cluster", + ) + pg_bin.command_fails( + ["pg_checksums", "--enable", "--no-sync", "--pgdata", pgdata], + "enabling checksums fails if already enabled", + ) + pg_bin.command_like( + ["pg_controldata", pgdata], + r"Data page checksum version:.*1", + "checksums enabled in control file", + ) + + pg_bin.command_ok( + ["pg_checksums", "--disable", "--pgdata", pgdata], + "checksums successfully disabled in cluster", + ) + pg_bin.command_fails( + ["pg_checksums", "--disable", "--no-sync", "--pgdata", pgdata], + "disabling checksums fails if already disabled", + ) + pg_bin.command_like( + ["pg_controldata", pgdata], + r"Data page checksum version:.*0", + "checksums disabled in control file", + ) + + pg_bin.command_ok( + ["pg_checksums", "--enable", "--no-sync", "--pgdata", pgdata], + "checksums successfully enabled in cluster", + ) + pg_bin.command_like( + ["pg_controldata", pgdata], + r"Data page checksum version:.*1", + "checksums enabled in control file", + ) + + pg_bin.command_ok( + ["pg_checksums", "--check", "--pgdata", pgdata], "succeeds with offline cluster" + ) + pg_bin.command_ok( + ["pg_checksums", "--pgdata", pgdata], "verifies checksums as default action" + ) + + pg_bin.command_fails( + ["pg_checksums", "--disable", "--filenode", "1234", "--pgdata", pgdata], + "fails when relfilenodes are requested and action is --disable", + ) + pg_bin.command_fails( + ["pg_checksums", "--enable", "--filenode", "1234", "--pgdata", pgdata], + "fails when relfilenodes are requested and action is --enable", + ) + + # postgres -C for an offline cluster (reports the GUC; server won't start). + pg_bin.command_checks_all( + [ + "pg_ctl", + "start", + "--silent", + "--pgdata", + pgdata, + "-o", + "-C data_checksums -c log_min_messages=fatal", + ], + 1, + [r"^on$"], + [r"could not start server"], + "data_checksums=on is reported on an offline cluster", + ) + + node.start() + pg_bin.command_fails( + ["pg_checksums", "--check", "--pgdata", pgdata], "fails with online cluster" + ) + + _check_relation_corruption(pg_bin, node, "corrupt1", "pg_default") + + tablespace_dir = node.basedir / "ts_corrupt_dir" + tablespace_dir.mkdir() + node.safe_psql("CREATE TABLESPACE ts_corrupt LOCATION '{}';".format(tablespace_dir)) + _check_relation_corruption(pg_bin, node, "corrupt2", "ts_corrupt") + + node.stop() + + # A foreign tablespace location must not be scanned. + (tablespace_dir / "PG_99_999999991").mkdir() + pypg.append_to_file(tablespace_dir / "PG_99_999999991" / "foo", "123") + pg_bin.command_ok( + ["pg_checksums", "--check", "--pgdata", pgdata], + "succeeds with foreign tablespace", + ) + + for file in _FAIL_CORRUPT_FILES: + _fail_corrupt(node, file) diff --git a/src/bin/pg_config/Makefile b/src/bin/pg_config/Makefile index ce78a14d38f37..827f0ae6fd84d 100644 --- a/src/bin/pg_config/Makefile +++ b/src/bin/pg_config/Makefile @@ -39,6 +39,7 @@ clean distclean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_config/meson.build b/src/bin/pg_config/meson.build index cbdfe8e5a4c0f..04ef59dfcec9d 100644 --- a/src/bin/pg_config/meson.build +++ b/src/bin/pg_config/meson.build @@ -26,6 +26,11 @@ tests += { 't/001_pg_config.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_pg_config.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_config/pyt/test_001_pg_config.py b/src/bin/pg_config/pyt/test_001_pg_config.py new file mode 100644 index 0000000000000..4a58ee080db9f --- /dev/null +++ b/src/bin/pg_config/pyt/test_001_pg_config.py @@ -0,0 +1,28 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_config/t/001_pg_config.pl.""" + + +def test_pg_config(pg_bin): + """pg_config option handling and output shape.""" + pg_bin.program_help_ok("pg_config") + pg_bin.program_version_ok("pg_config") + pg_bin.program_options_handling_ok("pg_config") + pg_bin.command_like( + ["pg_config", "--bindir"], r"bin", "pg_config single option" + ) # XXX might be wrong + pg_bin.command_like( + ["pg_config", "--bindir", "--libdir"], + r"bin.*\n.*lib", + "pg_config two options", + ) + pg_bin.command_like( + ["pg_config", "--libdir", "--bindir"], + r"lib.*\n.*bin", + "pg_config two options different order", + ) + pg_bin.command_like( + ["pg_config"], + r".*\n.*\n.*", + "pg_config without options prints many lines", + ) diff --git a/src/bin/pg_controldata/Makefile b/src/bin/pg_controldata/Makefile index 28709aeae1189..a50a4a3dcf03f 100644 --- a/src/bin/pg_controldata/Makefile +++ b/src/bin/pg_controldata/Makefile @@ -39,6 +39,7 @@ clean distclean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_controldata/meson.build b/src/bin/pg_controldata/meson.build index c587bb5bfd917..2e2a4100863b6 100644 --- a/src/bin/pg_controldata/meson.build +++ b/src/bin/pg_controldata/meson.build @@ -26,6 +26,11 @@ tests += { 't/001_pg_controldata.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_pg_controldata.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_controldata/pyt/test_001_pg_controldata.py b/src/bin/pg_controldata/pyt/test_001_pg_controldata.py new file mode 100644 index 0000000000000..7b6312885bcb9 --- /dev/null +++ b/src/bin/pg_controldata/pyt/test_001_pg_controldata.py @@ -0,0 +1,44 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_controldata/t/001_pg_controldata.pl.""" + + +def test_pg_controldata(pg_bin, create_pg): + """pg_controldata output, argument handling, and corrupted pg_control.""" + pg_bin.program_help_ok("pg_controldata") + pg_bin.program_version_ok("pg_controldata") + pg_bin.program_options_handling_ok("pg_controldata") + pg_bin.command_fails(["pg_controldata"], "pg_controldata without arguments fails") + pg_bin.command_fails( + ["pg_controldata", "nonexistent"], + "pg_controldata with nonexistent directory fails", + ) + + node = create_pg("main", start=False) + + pg_bin.command_like( + ["pg_controldata", node.datadir], + r"checkpoint", + "pg_controldata produces output", + ) + + # Corrupt pg_control by overwriting most of it with zeros. The first 16 + # bytes (pg_control version number) are left intact so we get a checksum + # mismatch rather than a version-number error. + pg_control = node.datadir / "global" / "pg_control" + size = pg_control.stat().st_size + with open(pg_control, "r+b") as fh: + fh.seek(16) + fh.write(b"\x00" * (size - 16)) + + pg_bin.command_checks_all( + ["pg_controldata", node.datadir], + 0, + [r"."], + [ + r"warning: calculated CRC checksum does not match value stored in " + r"control file", + r"warning: invalid WAL segment size", + ], + "pg_controldata with corrupted pg_control", + ) diff --git a/src/bin/pg_ctl/Makefile b/src/bin/pg_ctl/Makefile index 5c2d4180980e8..bfd7af51ddb9e 100644 --- a/src/bin/pg_ctl/Makefile +++ b/src/bin/pg_ctl/Makefile @@ -48,6 +48,7 @@ clean distclean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_ctl/meson.build b/src/bin/pg_ctl/meson.build index 69fa7a2842716..3752e9d8d831d 100644 --- a/src/bin/pg_ctl/meson.build +++ b/src/bin/pg_ctl/meson.build @@ -29,6 +29,14 @@ tests += { 't/004_logrotate.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_start_stop.py', + 'pyt/test_002_status.py', + 'pyt/test_003_promote.py', + 'pyt/test_004_logrotate.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_ctl/pyt/test_001_start_stop.py b/src/bin/pg_ctl/pyt/test_001_start_stop.py new file mode 100644 index 0000000000000..f383d5225e6d0 --- /dev/null +++ b/src/bin/pg_ctl/pyt/test_001_start_stop.py @@ -0,0 +1,105 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +""" +Port of src/bin/pg_ctl/t/001_start_stop.pl. + +Drives pg_ctl directly (initdb/start/stop/restart) against a hand-configured +data directory and checks default and group-access file permissions. +""" + +import os +import platform + +import pytest + +import pypg + +windows_os = platform.system() == "Windows" +use_unix_sockets = not windows_os + + +def test_start_stop(pg_bin, tmp_path, sockdir): + """pg_ctl start/stop/restart and the resulting file permissions.""" + pg_regress = os.environ.get("PG_REGRESS") + if not pg_regress: + pytest.skip("PG_REGRESS environment variable is not set") + + pg_bin.program_help_ok("pg_ctl") + pg_bin.program_version_ok("pg_ctl") + pg_bin.program_options_handling_ok("pg_ctl") + + pg_bin.command_exit_is( + ["pg_ctl", "start", "--pgdata", tmp_path / "nonexistent"], + 1, + "pg_ctl start with nonexistent directory", + ) + + data = tmp_path / "data" + pg_bin.command_ok( + ["pg_ctl", "initdb", "--pgdata", data, "--options", "--no-sync"], + "pg_ctl initdb", + ) + pg_bin.command_ok([pg_regress, "--config-auth", data], "configure authentication") + + node_port = pypg.get_free_port() + with open(data / "postgresql.conf", "a", encoding="utf-8") as conf: + conf.write("fsync = off\n") + conf.write("port = {}\n".format(node_port)) + temp_config = os.environ.get("TEMP_CONFIG") + if temp_config: + conf.write(pypg.slurp_file(temp_config)) + if use_unix_sockets: + conf.write("listen_addresses = ''\n") + conf.write("unix_socket_directories = '{}'\n".format(sockdir)) + else: + conf.write("listen_addresses = '127.0.0.1'\n") + + log_path = tmp_path / "001_start_stop_server.log" + pg_bin.command_like( + ["pg_ctl", "start", "--pgdata", data, "--log", log_path], + r"(?s)done.*server started", + "pg_ctl start", + ) + + pg_bin.command_fails( + ["pg_ctl", "start", "--pgdata", data], "second pg_ctl start fails" + ) + pg_bin.command_ok(["pg_ctl", "stop", "--pgdata", data], "pg_ctl stop") + pg_bin.command_fails( + ["pg_ctl", "stop", "--pgdata", data], "second pg_ctl stop fails" + ) + + # Log file for default permission test. + log_file_name = data / "perm-test-600.log" + pg_bin.command_ok( + ["pg_ctl", "restart", "--pgdata", data, "--log", log_file_name], + "pg_ctl restart with server not running", + ) + + # Permissions on log file should be default. + if not windows_os: + assert log_file_name.is_file() + assert pypg.check_mode_recursive(data, 0o700, 0o600) + + # Log file for group access test. + log_file_name = data / "perm-test-640.log" + if not windows_os: + # Stop, then change the data dir mode so the log file will be created + # with group read privileges on the next start. + pg_bin.command_ok(["pg_ctl", "stop", "--pgdata", data]) + pypg.chmod_recursive(data, 0o750, 0o640) + + pg_bin.command_ok( + ["pg_ctl", "start", "--pgdata", data, "--log", log_file_name], + "start server to check group permissions", + ) + + assert log_file_name.is_file() + assert pypg.check_mode_recursive(data, 0o750, 0o640) + + pg_bin.command_ok( + ["pg_ctl", "restart", "--pgdata", data, "--log", log_file_name], + "pg_ctl restart with server running", + ) + + pg_bin.command_ok(["pg_ctl", "stop", "--pgdata", data]) diff --git a/src/bin/pg_ctl/pyt/test_002_status.py b/src/bin/pg_ctl/pyt/test_002_status.py new file mode 100644 index 0000000000000..34b63bd383034 --- /dev/null +++ b/src/bin/pg_ctl/pyt/test_002_status.py @@ -0,0 +1,34 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +""" +Port of src/bin/pg_ctl/t/002_status.pl. + +Checks pg_ctl status exit codes for a nonexistent data directory, a stopped +server, and a running server. +""" + + +def test_status(pg_bin, create_pg, tmp_path): + """pg_ctl status reports the documented exit codes.""" + pg_bin.command_exit_is( + ["pg_ctl", "status", "--pgdata", tmp_path / "nonexistent"], + 4, + "pg_ctl status with nonexistent directory", + ) + + node = create_pg("main", start=False) + + pg_bin.command_exit_is( + ["pg_ctl", "status", "--pgdata", node.datadir], + 3, + "pg_ctl status with server not running", + ) + + node.start() + pg_bin.command_exit_is( + ["pg_ctl", "status", "--pgdata", node.datadir], + 0, + "pg_ctl status with server running", + ) + + node.stop() diff --git a/src/bin/pg_ctl/pyt/test_003_promote.py b/src/bin/pg_ctl/pyt/test_003_promote.py new file mode 100644 index 0000000000000..246bc943139cb --- /dev/null +++ b/src/bin/pg_ctl/pyt/test_003_promote.py @@ -0,0 +1,69 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +""" +Port of src/bin/pg_ctl/t/003_promote.pl. + +Exercises pg_ctl promote: failures against a nonexistent directory, a stopped +instance, and a primary; and successful promotion of a streaming standby both +with --no-wait and with the default (waiting) behavior. +""" + + +def test_promote(pg_bin, create_pg, tmp_path): + """pg_ctl promote against bad targets fails; promoting a standby works.""" + pg_bin.command_fails_like( + ["pg_ctl", "--pgdata", tmp_path / "nonexistent", "promote"], + r"directory .* does not exist", + "pg_ctl promote with nonexistent directory", + ) + + primary = create_pg("primary", allows_streaming=True, start=False) + + pg_bin.command_fails_like( + ["pg_ctl", "--pgdata", primary.datadir, "promote"], + r"PID file .* does not exist", + "pg_ctl promote of not running instance fails", + ) + + primary.start() + + pg_bin.command_fails_like( + ["pg_ctl", "--pgdata", primary.datadir, "promote"], + r"not in standby mode", + "pg_ctl promote of primary instance fails", + ) + + primary.backup("my_backup") + standby = create_pg( + "standby", from_backup=(primary, "my_backup"), has_streaming=True + ) + + assert standby.safe_psql("SELECT pg_is_in_recovery()") == "t", "standby in recovery" + + pg_bin.command_ok( + ["pg_ctl", "--pgdata", standby.datadir, "--no-wait", "promote"], + "pg_ctl --no-wait promote of standby runs", + ) + + assert standby.poll_query_until( + "SELECT NOT pg_is_in_recovery()" + ), "promoted standby is not in recovery" + + # Same again with the default wait option. + standby2 = create_pg( + "standby2", from_backup=(primary, "my_backup"), has_streaming=True + ) + + assert ( + standby2.safe_psql("SELECT pg_is_in_recovery()") == "t" + ), "standby is in recovery" + + pg_bin.command_ok( + ["pg_ctl", "--pgdata", standby2.datadir, "promote"], + "pg_ctl promote of standby runs", + ) + + # No wait here: the default promote already waited. + assert ( + standby2.safe_psql("SELECT pg_is_in_recovery()") == "f" + ), "promoted standby is not in recovery" diff --git a/src/bin/pg_ctl/pyt/test_004_logrotate.py b/src/bin/pg_ctl/pyt/test_004_logrotate.py new file mode 100644 index 0000000000000..903636a882f1e --- /dev/null +++ b/src/bin/pg_ctl/pyt/test_004_logrotate.py @@ -0,0 +1,121 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +""" +Port of src/bin/pg_ctl/t/004_logrotate.pl. + +Verifies that the logging collector writes to stderr/csvlog/jsonlog files, that +pg_ctl logrotate switches to fresh files, and that pg_current_logfile() agrees +with current_logfiles. +""" + +import os +import re +import time + +import pypg + +# Matches the three-line current_logfiles content (one line per destination). +_CURRENT_LOGFILES_RE = ( + r"^stderr log/postgresql-.*log\n" + r"csvlog log/postgresql-.*csv\n" + r"jsonlog log/postgresql-.*json$" +) + + +def _max_attempts(): + return 10 * int(os.environ.get("PG_TEST_TIMEOUT_DEFAULT", "180")) + + +def _fetch_file_name(logfiles, fmt): + """Return the file name recorded for a destination in current_logfiles.""" + filename = None + for line in logfiles.splitlines(): + match = re.search(r"{} (.*)$".format(fmt), line) + if match: + filename = match.group(1) + return filename + + +def _check_log_pattern(fmt, logfiles, pattern, node): + """Assert pattern appears in the fmt log file and pg_current_logfile agrees.""" + lfname = _fetch_file_name(logfiles, fmt) + + contents = "" + for _ in range(_max_attempts()): + contents = pypg.slurp_file(node.datadir / lfname) + if re.search(pattern, contents): + break + time.sleep(0.1) + + assert re.search( + pattern, contents + ), "found expected log file content for {}".format(fmt) + + # While we're at it, test the pg_current_logfile() function. + assert ( + node.safe_psql("SELECT pg_current_logfile('{}')".format(fmt)) == lfname + ), "pg_current_logfile() gives correct answer with {}".format(fmt) + + +def _wait_for_current_logfiles(node, differs_from=None): + """Slurp current_logfiles, retrying until it exists (and optionally changes).""" + path = node.datadir / "current_logfiles" + contents = "" + for _ in range(_max_attempts()): + try: + contents = pypg.slurp_file(path) + except FileNotFoundError: + time.sleep(0.1) + continue + if differs_from is None or contents != differs_from: + return contents + time.sleep(0.1) + return contents + + +def test_logrotate(create_pg): + """Logging collector output, rotation, and pg_current_logfile().""" + node = create_pg("primary", start=False) + node.append_conf( + "\n".join( + [ + "logging_collector = on", + "log_destination = 'stderr, csvlog, jsonlog'", + # these ensure stability of test results: + "log_rotation_age = 0", + "lc_messages = 'C'", + ] + ) + ) + node.start() + + # Verify that log output gets to the file (division by zero error). + node.bin.result(["psql", "-c", "SELECT 1/0"]) + + current_logfiles = _wait_for_current_logfiles(node) + assert re.search(_CURRENT_LOGFILES_RE, current_logfiles), "current_logfiles is sane" + + _check_log_pattern("stderr", current_logfiles, "division by zero", node) + _check_log_pattern("csvlog", current_logfiles, "division by zero", node) + _check_log_pattern("jsonlog", current_logfiles, "division by zero", node) + + # Sleep 2 seconds and ask for log rotation; this should result in output + # into a different log file name. + time.sleep(2) + node.logrotate() + + new_current_logfiles = _wait_for_current_logfiles( + node, differs_from=current_logfiles + ) + assert re.search( + _CURRENT_LOGFILES_RE, new_current_logfiles + ), "new current_logfiles is sane" + + # Verify that log output gets to this file, too (syntax error). + node.bin.result(["psql", "-c", "fee fi fo fum"]) + + _check_log_pattern("stderr", new_current_logfiles, "syntax error", node) + _check_log_pattern("csvlog", new_current_logfiles, "syntax error", node) + _check_log_pattern("jsonlog", new_current_logfiles, "syntax error", node) + + node.stop() diff --git a/src/bin/pg_resetwal/Makefile b/src/bin/pg_resetwal/Makefile index 7113acbef2fb6..3832d3bfde2f4 100644 --- a/src/bin/pg_resetwal/Makefile +++ b/src/bin/pg_resetwal/Makefile @@ -41,6 +41,7 @@ clean distclean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_resetwal/meson.build b/src/bin/pg_resetwal/meson.build index c2607767b511f..feccce1af30f3 100644 --- a/src/bin/pg_resetwal/meson.build +++ b/src/bin/pg_resetwal/meson.build @@ -27,6 +27,12 @@ tests += { 't/002_corrupted.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_corrupted.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_resetwal/pyt/test_001_basic.py b/src/bin/pg_resetwal/pyt/test_001_basic.py new file mode 100644 index 0000000000000..8ec533e48f998 --- /dev/null +++ b/src/bin/pg_resetwal/pyt/test_001_basic.py @@ -0,0 +1,189 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_resetwal/t/001_basic.pl.""" + +import os +import platform +import re + +import pypg + +windows_os = platform.system() == "Windows" + +# (args, stderr_pattern, test_name) for option-validation failures. +_OPTION_ERRORS = [ + (["-c", "foo"], r"error: invalid argument for option -c", "incorrect -c option"), + (["-c", "10,bar"], r"error: invalid argument for option -c", "incorrect -c part 2"), + (["-c", "1,10"], r"greater than", "-c ids value 1 part 1"), + (["-c", "10,1"], r"greater than", "-c value 1 part 2"), + (["-e", "foo"], r"error: invalid argument for option -e", "incorrect -e option"), + (["-e", "-1"], r"error: invalid argument for option -e", "-e value -1"), + (["-l", "foo"], r"error: invalid argument for option -l", "incorrect -l option"), + (["-m", "foo"], r"error: invalid argument for option -m", "incorrect -m option"), + (["-m", "10,bar"], r"error: invalid argument for option -m", "incorrect -m part 2"), + (["-m", "0,10"], r"must not be 0", "-m value 0 in the first part"), + (["-m", "10,0"], r"must not be 0", "-m value 0 in the second part"), + (["-o", "foo"], r"error: invalid argument for option -o", "incorrect -o option"), + (["-o", "0"], r"must not be 0", "-o value 0"), + (["-O", "foo"], r"error: invalid argument for option -O", "incorrect -O option"), + (["-O", "-1"], r"error: invalid argument for option -O", "-O value -1"), + (["--wal-segsize", "foo"], r"error: invalid value", "incorrect --wal-segsize"), + (["--wal-segsize", "13"], r"must be a power", "invalid --wal-segsize value"), + (["-u", "foo"], r"error: invalid argument for option -u", "incorrect -u option"), + (["-u", "1"], r"must be greater than", "-u value too small"), + (["-x", "foo"], r"error: invalid argument for option -x", "incorrect -x option"), + (["-x", "1"], r"must be greater than", "-x value too small"), + (["-x", "-1"], r"error: invalid argument for option -x", "-x value -1"), + (["-x", "-100"], r"error: invalid argument for option -x", "negative -x value"), + (["-x", "10000000000"], r"error: invalid argument for option -x", "-x too large"), + ( + ["--char-signedness", "foo"], + r"error: invalid argument for option --char-signedness", + "incorrect --char-signedness option", + ), +] + + +def test_pg_resetwal(pg_bin, create_pg): + """pg_resetwal dry run, permissions, running, and option handling.""" + pg_bin.program_help_ok("pg_resetwal") + pg_bin.program_version_ok("pg_resetwal") + pg_bin.program_options_handling_ok("pg_resetwal") + + node = create_pg("main", start=False) + node.append_conf("track_commit_timestamp = on") + + pg_bin.command_like( + ["pg_resetwal", "-n", node.datadir], + r"checkpoint", + "pg_resetwal -n produces output", + ) + + if not windows_os: + assert pypg.check_mode_recursive( + node.datadir, 0o700, 0o600 + ), "check PGDATA permissions" + + pg_bin.command_ok(["pg_resetwal", "--pgdata", node.datadir], "pg_resetwal runs") + node.start() + assert node.safe_psql("SELECT 1;") == "1", "server running and working after reset" + + pg_bin.command_fails_like( + ["pg_resetwal", node.datadir], + r"lock file .* exists", + "fails if server running", + ) + + node.stop("immediate") + pg_bin.command_fails_like( + ["pg_resetwal", node.datadir], + r"database server was not shut down cleanly", + "does not run after immediate shutdown", + ) + pg_bin.command_ok( + ["pg_resetwal", "--force", node.datadir], + "runs after immediate shutdown with force", + ) + node.start() + assert ( + node.safe_psql("SELECT 1;") == "1" + ), "server running and working after forced reset" + node.stop() + + _test_option_errors(pg_bin, node) + _test_control_overrides(pg_bin, node) + + node.start() + + +def _test_option_errors(pg_bin, node): + pg_bin.command_fails_like( + ["pg_resetwal", "foo"], + r"error: could not read permissions of directory", + "fails with nonexistent data directory", + ) + pg_bin.command_fails_like( + ["pg_resetwal", "foo", "bar"], + r"too many command-line arguments", + "fails with too many command-line arguments", + ) + pg_bin.command_fails_like( + ["pg_resetwal"], + r"no data directory specified", + "fails with too few command-line arguments", + extra_env={"PGDATA": str(node.datadir)}, # not used + ) + + for args, pattern, name in _OPTION_ERRORS: + pg_bin.command_fails_like( + ["pg_resetwal", *args, node.datadir], + pattern, + "fails with {}".format(name), + ) + + +def _slru_files(node, subdir): + entries = os.listdir(node.datadir / subdir) + return sorted(f for f in entries if re.search(r"[0-9A-F]+", f)) + + +def _test_control_overrides(pg_bin, node): + out = pg_bin.result(["pg_resetwal", "--dry-run", node.datadir]).stdout + match = re.search(r"^Database block size: *(\d+)$", out, re.M) + assert match + blcksz = int(match.group(1)) + + cmd = [ + "pg_resetwal", + "--pgdata", + node.datadir, + "--epoch", + "1", + "--next-wal-file", + "00000001000000320000004B", + "--next-oid", + "100000", + "--wal-segsize", + "1", + ] + + files = _slru_files(node, "pg_commit_ts") + cmd += [ + "--commit-timestamp-ids", + "{},{}".format( + 3 if int(files[0], 16) == 0 else int(files[0], 16), int(files[-1], 16) + ), + ] + + files = _slru_files(node, "pg_multixact/offsets") + mult = 32 * blcksz // 8 + cmd += [ + "--multixact-ids", + "{},{}".format( + (int(files[-1], 16) + 1) * mult, + 1 if int(files[0], 16) == 0 else int(files[0], 16) * mult, + ), + ] + + files = _slru_files(node, "pg_multixact/members") + mult = 32 * int(blcksz / 20) * 4 + cmd += ["--multixact-offset", str((int(files[-1], 16) + 1) * mult)] + + files = _slru_files(node, "pg_xact") + mult = 32 * blcksz * 4 + cmd += [ + "--oldest-transaction-id", + str(3 if int(files[0], 16) == 0 else int(files[0], 16) * mult), + "--next-transaction-id", + str((int(files[-1], 16) + 1) * mult), + ] + + pg_bin.command_ok( + [*cmd, "--dry-run"], "runs with control override options, dry run" + ) + pg_bin.command_ok(cmd, "runs with control override options") + pg_bin.command_like( + ["pg_resetwal", "--dry-run", node.datadir], + r"(?m)^Latest checkpoint's NextOID: *100000$", + "spot check that control changes were applied", + ) diff --git a/src/bin/pg_resetwal/pyt/test_002_corrupted.py b/src/bin/pg_resetwal/pyt/test_002_corrupted.py new file mode 100644 index 0000000000000..36d52d2dbfaa1 --- /dev/null +++ b/src/bin/pg_resetwal/pyt/test_002_corrupted.py @@ -0,0 +1,57 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_resetwal/t/002_corrupted.pl. + +Tests handling of a corrupted pg_control. +""" + + +def test_corrupted(pg_bin, create_pg): + """pg_resetwal copes with an all-zero and a partially-zeroed pg_control.""" + node = create_pg("main", start=False) + + pg_control = node.datadir / "global" / "pg_control" + size = pg_control.stat().st_size + + # Read the head of the file to get PG_CONTROL_VERSION in particular. + data = pg_control.read_bytes()[:16] + + # Fill pg_control with zeros. + pg_control.write_bytes(b"\x00" * size) + + pg_bin.command_checks_all( + ["pg_resetwal", "--dry-run", node.datadir], + 0, + [r"pg_control version number"], + [ + r"pg_resetwal: warning: pg_control exists but is broken or wrong " + r"version; ignoring it" + ], + "processes corrupted pg_control all zeroes", + ) + + # Put back the saved header. This uses a different code path internally, + # allowing a zero WAL segment size to be processed. + pg_control.write_bytes(data + b"\x00" * (size - 16)) + + pg_bin.command_checks_all( + ["pg_resetwal", "--dry-run", node.datadir], + 0, + [r"pg_control version number"], + [ + r"pg_resetwal: warning: pg_control specifies invalid WAL segment " + r"size \(0 bytes\); proceed with caution" + ], + "processes zero WAL segment size", + ) + + # Now try to run it for real. + pg_bin.command_fails_like( + ["pg_resetwal", node.datadir], + r"not proceeding because control file values were guessed", + "does not run when control file values were guessed", + ) + pg_bin.command_ok( + ["pg_resetwal", "--force", node.datadir], + "runs with force when control file values were guessed", + ) diff --git a/src/bin/pg_test_fsync/Makefile b/src/bin/pg_test_fsync/Makefile index 4c5e518125033..6cb875513e587 100644 --- a/src/bin/pg_test_fsync/Makefile +++ b/src/bin/pg_test_fsync/Makefile @@ -24,6 +24,7 @@ installdirs: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_test_fsync/meson.build b/src/bin/pg_test_fsync/meson.build index f14793d665ab0..7a8a02143d2ea 100644 --- a/src/bin/pg_test_fsync/meson.build +++ b/src/bin/pg_test_fsync/meson.build @@ -26,6 +26,11 @@ tests += { 't/001_basic.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_test_fsync/pyt/test_001_basic.py b/src/bin/pg_test_fsync/pyt/test_001_basic.py new file mode 100644 index 0000000000000..cda2e600919e6 --- /dev/null +++ b/src/bin/pg_test_fsync/pyt/test_001_basic.py @@ -0,0 +1,21 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_test_fsync/t/001_basic.pl.""" + + +def test_pg_test_fsync(pg_bin): + """pg_test_fsync option validation.""" + pg_bin.program_help_ok("pg_test_fsync") + pg_bin.program_version_ok("pg_test_fsync") + pg_bin.program_options_handling_ok("pg_test_fsync") + + pg_bin.command_fails_like( + ["pg_test_fsync", "--secs-per-test", "a"], + r"pg_test_fsync: error: invalid argument for option --secs-per-test", + "pg_test_fsync: invalid argument for option --secs-per-test", + ) + pg_bin.command_fails_like( + ["pg_test_fsync", "--secs-per-test", "0"], + r"pg_test_fsync: error: --secs-per-test must be in range 1\.\.4294967295", + "pg_test_fsync: --secs-per-test must be in range", + ) diff --git a/src/bin/pg_test_timing/Makefile b/src/bin/pg_test_timing/Makefile index 7f677edadb30f..ec93bc5f88d6e 100644 --- a/src/bin/pg_test_timing/Makefile +++ b/src/bin/pg_test_timing/Makefile @@ -24,6 +24,7 @@ installdirs: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_test_timing/meson.build b/src/bin/pg_test_timing/meson.build index 89f31fa95299c..a344762bee003 100644 --- a/src/bin/pg_test_timing/meson.build +++ b/src/bin/pg_test_timing/meson.build @@ -26,6 +26,11 @@ tests += { 't/001_basic.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_test_timing/pyt/test_001_basic.py b/src/bin/pg_test_timing/pyt/test_001_basic.py new file mode 100644 index 0000000000000..daa0b7c382541 --- /dev/null +++ b/src/bin/pg_test_timing/pyt/test_001_basic.py @@ -0,0 +1,40 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_test_timing/t/001_basic.pl.""" + +import re + + +def test_pg_test_timing(pg_bin): + """pg_test_timing option validation and a basic run.""" + pg_bin.program_help_ok("pg_test_timing") + pg_bin.program_version_ok("pg_test_timing") + pg_bin.program_options_handling_ok("pg_test_timing") + + pg_bin.command_fails_like( + ["pg_test_timing", "--duration", "a"], + r"pg_test_timing: invalid argument for option --duration", + "pg_test_timing: invalid argument for option --duration", + ) + pg_bin.command_fails_like( + ["pg_test_timing", "--duration", "0"], + r"pg_test_timing: --duration must be in range 1\.\.4294967295", + "pg_test_timing: --duration must be in range", + ) + pg_bin.command_fails_like( + ["pg_test_timing", "--cutoff", "101"], + r"pg_test_timing: --cutoff must be in range 0\.\.100", + "pg_test_timing: --cutoff must be in range", + ) + + # We can't check for specific output, but a simple run should produce + # the expected headers. + result = pg_bin.result(["pg_test_timing", "--duration", "1"]) + assert result.rc == 0, "pg_test_timing: exit code 0" + assert result.stderr == "", "pg_test_timing: no stderr" + assert re.search( + r"(?s)Testing timing overhead for 1 second\..*" + r"Histogram of timing durations:.*" + r"Observed timing durations up to 99\.9900%:", + result.stdout, + ), "pg_test_timing: stdout passes sanity check" diff --git a/src/bin/pg_waldump/meson.build b/src/bin/pg_waldump/meson.build index 5296f21b82c7f..ee95ad0af957a 100644 --- a/src/bin/pg_waldump/meson.build +++ b/src/bin/pg_waldump/meson.build @@ -36,6 +36,13 @@ tests += { 't/002_save_fullpage.pl', ], }, + 'pytest': { + 'env': {'TAR': tar.found() ? tar.full_path() : ''}, + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_save_fullpage.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_waldump/pyt/test_001_basic.py b/src/bin/pg_waldump/pyt/test_001_basic.py new file mode 100644 index 0000000000000..d61e770c14dcc --- /dev/null +++ b/src/bin/pg_waldump/pyt/test_001_basic.py @@ -0,0 +1,473 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_waldump/t/001_basic.pl. + +Basic pg_waldump tests: option/argument validation, the rmgr list, and decoding +of a range of WAL records (including a contrecord spanning WAL segments) read +both from a live data directory and from tar archives (none/gzip). +""" + +import os +import random +import re +import shutil +import struct +import subprocess + +_RMGR_LIST = """\ +XLOG +Transaction +Storage +CLOG +Database +Tablespace +MultiXact +RelMap +Standby +Heap2 +Heap +Btree +Hash +Gin +Gist +Sequence +SPGist +BRIN +CommitTs +ReplicationOrigin +Generic +LogicalMessage +XLOG2""" + +# Schema/workload exercising heap, btree, hash, sequence, abort, unlogged/init +# fork, gin, gist, spgist, brin, vacuum, logical message, relmap, and database +# records. +_WORKLOAD = """\ +-- heap, btree, hash, sequence +CREATE TABLE t1 (a int GENERATED ALWAYS AS IDENTITY, b text); +CREATE INDEX i1a ON t1 USING btree (a); +CREATE INDEX i1b ON t1 USING hash (b); +INSERT INTO t1 VALUES (default, 'one'), (default, 'two'); +DELETE FROM t1 WHERE b = 'one'; +TRUNCATE t1; + +-- abort +START TRANSACTION; +INSERT INTO t1 VALUES (default, 'three'); +ROLLBACK; + +-- unlogged/init fork +CREATE UNLOGGED TABLE t2 (x int); +CREATE INDEX i2 ON t2 USING btree (x); +INSERT INTO t2 SELECT generate_series(1, 10); + +-- gin +CREATE TABLE gin_idx_tbl (id bigserial PRIMARY KEY, data jsonb); +CREATE INDEX gin_idx ON gin_idx_tbl USING gin (data); +INSERT INTO gin_idx_tbl + WITH random_json AS ( + SELECT json_object_agg(key, trunc(random() * 10)) as json_data + FROM unnest(array['a', 'b', 'c']) as u(key)) + SELECT generate_series(1,500), json_data FROM random_json; + +-- gist, spgist +CREATE TABLE gist_idx_tbl (p point); +CREATE INDEX gist_idx ON gist_idx_tbl USING gist (p); +CREATE INDEX spgist_idx ON gist_idx_tbl USING spgist (p); +INSERT INTO gist_idx_tbl (p) VALUES (point '(1, 1)'), (point '(3, 2)'), (point '(6, 3)'); + +-- brin +CREATE TABLE brin_idx_tbl (col1 int, col2 text, col3 text ); +CREATE INDEX brin_idx ON brin_idx_tbl USING brin (col1, col2, col3) WITH (autosummarize=on); +INSERT INTO brin_idx_tbl SELECT generate_series(1, 10000), 'dummy', 'dummy'; +UPDATE brin_idx_tbl SET col2 = 'updated' WHERE col1 BETWEEN 1 AND 5000; +SELECT brin_summarize_range('brin_idx', 0); +SELECT brin_desummarize_range('brin_idx', 0); + +VACUUM; + +-- logical message +SELECT pg_logical_emit_message(true, 'foo', 'bar'); + +-- relmap +VACUUM FULL pg_authid; + +-- database +CREATE DATABASE d1; +DROP DATABASE d1; +""" + +# Consume remaining room in the current WAL segment, leaving space enough only +# for the start of a largish record (sets up a contrecord that spans segments). +_FILL_SEGMENT = """\ +DO $$ +DECLARE + wal_segsize int := setting::int FROM pg_settings WHERE name = 'wal_segment_size'; + remain int; + iters int := 0; +BEGIN + LOOP + INSERT into t1(b) + select repeat(encode(sha256(g::text::bytea), 'hex'), (random() * 15 + 1)::int) + from generate_series(1, 10) g; + + remain := wal_segsize - (pg_current_wal_insert_lsn() - '0/0') % wal_segsize; + IF remain < 2 * setting::int from pg_settings where name = 'block_size' THEN + RAISE log 'exiting after % iterations, % bytes to end of WAL segment', iters, remain; + EXIT; + END IF; + iters := iters + 1; + END LOOP; +END +$$; +""" + + +def _tar_portability_options(tar): + """Return tar flags forcing a readable ustar archive (cf. Utils helper).""" + if not tar: + return [] + devnull = os.devnull + ustar = subprocess.run( + [tar, "--format=ustar", "--owner=0", "--group=0", "-cf", devnull, devnull], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if ustar.returncode == 0: + return ["--format=ustar", "--owner=0", "--group=0"] + bsd = subprocess.run( + [tar, "-F", "ustar", "-cf", devnull, devnull], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if bsd.returncode == 0: + return ["-F", "ustar"] + return [] + + +def _generate_archive(tar, tar_p_flags, archive, directory, compression_flags): + """Create a tar archive of directory's entries in a shuffled order.""" + files = [e for e in os.listdir(directory) if e not in (".", "..")] + random.shuffle(files) + # tar is invoked from inside the WAL directory so the archived members are + # stored with bare names (mirrors the Perl chdir before command_ok). + result = subprocess.run( + [tar, *tar_p_flags, compression_flags, archive, *files], + cwd=directory, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + ) + assert result.returncode == 0, "tar archive created: {}".format( + result.stderr.decode("utf-8", "replace") + ) + + +def _run_waldump(pg_bin, *args): + """Run pg_waldump capturing (rc, stdout, stderr); never raises.""" + return pg_bin.result(["pg_waldump", *args]) + + +def _test_pg_waldump_skip_bytes(pg_bin, path, startlsn, endlsn): + """Starting one byte past a record boundary prints a 'skipping' message.""" + part1, part2 = startlsn.split("/") + new_start = "{}/{:X}".format(part1, int(part2, 16) + 1) + result = _run_waldump(pg_bin, "--start", new_start, "--end", endlsn, "--path", path) + assert result.rc == 0, "runs with start segment and start LSN specified" + assert re.search(r"first record is after", result.stderr), "info message printed" + + +def _test_pg_waldump(pg_bin, path, startlsn, endlsn, *opts): + """Run pg_waldump over a range; assert clean run and return stdout lines.""" + result = _run_waldump( + pg_bin, "--start", startlsn, "--end", endlsn, "--path", path, *opts + ) + assert result.rc == 0, "pg_waldump {}: runs ok".format(" ".join(opts)) + assert result.stderr == "", "pg_waldump {}: no stderr".format(" ".join(opts)) + lines = result.stdout.split("\n") + if lines and lines[-1] == "": + lines.pop() + assert len(lines) > 0, "pg_waldump {}: some lines are output".format(" ".join(opts)) + return lines + + +def _basic_option_checks(pg_bin): + """Help/version/option handling plus argument and option-value errors.""" + pg_bin.program_help_ok("pg_waldump") + pg_bin.program_version_ok("pg_waldump") + pg_bin.program_options_handling_ok("pg_waldump") + + pg_bin.command_fails_like(["pg_waldump"], r"error: no arguments", "no arguments") + pg_bin.command_fails_like( + ["pg_waldump", "foo", "bar", "baz"], + r"error: too many command-line arguments", + "too many arguments", + ) + pg_bin.command_fails_like( + ["pg_waldump", "--block", "bad"], + r"error: invalid block number", + "invalid block number", + ) + pg_bin.command_fails_like( + ["pg_waldump", "--fork", "bad"], + r"error: invalid fork name", + "invalid fork name", + ) + pg_bin.command_fails_like( + ["pg_waldump", "--limit", "bad"], r"error: invalid value", "invalid limit" + ) + pg_bin.command_fails_like( + ["pg_waldump", "--relation", "bad"], + r"error: invalid relation", + "invalid relation specification", + ) + pg_bin.command_fails_like( + ["pg_waldump", "--rmgr", "bad"], + r"error: resource manager .* does not exist", + "invalid rmgr name", + ) + pg_bin.command_fails_like( + ["pg_waldump", "--start", "bad"], + r"error: invalid WAL location", + "invalid start LSN", + ) + pg_bin.command_fails_like( + ["pg_waldump", "--end", "bad"], + r"error: invalid WAL location", + "invalid end LSN", + ) + pg_bin.command_like( + ["pg_waldump", "--rmgr=list"], + r"^" + _RMGR_LIST + r"$", + "rmgr list", + ) + + +def _file_checks(pg_bin, node, start_walfile, end_walfile, tmp_path): + """Range-by-file checks plus the invalid-magic-number broken-WAL check.""" + wal = os.path.join(node.datadir, "pg_wal") + pg_bin.command_fails_like( + ["pg_waldump", "foo", "bar"], + r'error: could not locate WAL file "foo"', + "start file not found", + ) + pg_bin.command_like( + ["pg_waldump", os.path.join(wal, start_walfile)], + r".", + "runs with start segment specified", + ) + pg_bin.command_fails_like( + ["pg_waldump", os.path.join(wal, start_walfile), "bar"], + r'error: could not open file "bar"', + "end file not found", + ) + pg_bin.command_like( + [ + "pg_waldump", + os.path.join(wal, start_walfile), + os.path.join(wal, end_walfile), + ], + r".", + "runs with start and end segment specified", + ) + pg_bin.command_like( + ["pg_waldump", "--quiet", "--path", wal, start_walfile], + r"^$", + "no output with --quiet option", + ) + + broken_wal_dir = tmp_path / "broken_wal" + broken_wal_dir.mkdir() + broken_wal = broken_wal_dir / start_walfile + shutil.copy(os.path.join(wal, start_walfile), broken_wal) + with open(broken_wal, "r+b") as fh: + fh.seek(0) + fh.write(struct.pack("= hi_lsn_bk + lo_lsn_bk, ( + "LSN stored in the file {}/{} precedes the one stored in the " + "block {}/{}".format(hi_lsn_fn, lo_lsn_fn, hi_lsn_bk, lo_lsn_bk) + ) + assert file_count > 0, "verify that at least one block has been saved" diff --git a/src/bin/pg_walsummary/Makefile b/src/bin/pg_walsummary/Makefile index 7563c243c1b71..b3d61a4ec28ad 100644 --- a/src/bin/pg_walsummary/Makefile +++ b/src/bin/pg_walsummary/Makefile @@ -44,6 +44,7 @@ clean distclean maintainer-clean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pg_walsummary/meson.build b/src/bin/pg_walsummary/meson.build index d012275402bba..8674eee8a9f5d 100644 --- a/src/bin/pg_walsummary/meson.build +++ b/src/bin/pg_walsummary/meson.build @@ -26,7 +26,13 @@ tests += { 't/001_basic.pl', 't/002_blocks.pl', ], - } + }, + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_blocks.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_walsummary/pyt/test_001_basic.py b/src/bin/pg_walsummary/pyt/test_001_basic.py new file mode 100644 index 0000000000000..c19e39f1c5695 --- /dev/null +++ b/src/bin/pg_walsummary/pyt/test_001_basic.py @@ -0,0 +1,16 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_walsummary/t/001_basic.pl.""" + + +def test_pg_walsummary_basic(pg_bin): + """pg_walsummary option handling and required-input check.""" + pg_bin.program_help_ok("pg_walsummary") + pg_bin.program_version_ok("pg_walsummary") + pg_bin.program_options_handling_ok("pg_walsummary") + + pg_bin.command_fails_like( + ["pg_walsummary"], + r"no input files specified", + "input files must be specified", + ) diff --git a/src/bin/pg_walsummary/pyt/test_002_blocks.py b/src/bin/pg_walsummary/pyt/test_002_blocks.py new file mode 100644 index 0000000000000..fa7359c5fcd99 --- /dev/null +++ b/src/bin/pg_walsummary/pyt/test_002_blocks.py @@ -0,0 +1,85 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_walsummary/t/002_blocks.pl.""" + +import re + + +def test_blocks(pg_bin, create_pg): + """WAL summarization produces a summary that pg_walsummary can read.""" + node1 = create_pg("node1", start=False, allows_streaming=True, has_archiving=True) + node1.append_conf("summarize_wal = on") + node1.start() + + # Create a table and insert rows, VACUUM FREEZE so autovacuum won't induce + # future modifications, then checkpoint. + node1.safe_psql( + "CREATE TABLE mytable (a int, b text);\n" + "INSERT INTO mytable\n" + "SELECT g, random()::text||random()::text||random()::text||random()::text\n" + "FROM generate_series(1, 400) g;\n" + "VACUUM FREEZE;\n" + ) + + base_lsn = node1.safe_psql("SELECT pg_current_wal_insert_lsn()") + node1.safe_psql("CHECKPOINT;") + + assert node1.poll_query_until( + "SELECT EXISTS (SELECT * from pg_available_wal_summaries() " + "WHERE end_lsn >= '{}')".format(base_lsn) + ), "WAL summarization caught up after insert" + + assert node1.poll_query_until( + "SELECT sum(reads) > 0 FROM pg_stat_io " + "WHERE backend_type = 'walsummarizer' AND object = 'wal'" + ), "WAL summarizer generated IO statistics" + + summarized_lsn = node1.safe_psql( + "SELECT MAX(end_lsn) AS summarized_lsn FROM pg_available_wal_summaries()" + ) + + # Update a row in the first block of the table and trigger a checkpoint. + node1.safe_psql( + "UPDATE mytable SET b = 'abcdefghijklmnopqrstuvwxyz' || b || '01234567890'\n" + "WHERE a = 2;\n" + "CHECKPOINT;\n" + ) + + assert node1.poll_query_until( + "SELECT EXISTS (SELECT * from pg_available_wal_summaries() " + "WHERE end_lsn > '{}')".format(summarized_lsn) + ), "got new WAL summary after update" + + details = node1.safe_psql( + "SELECT tli, start_lsn, end_lsn from pg_available_wal_summaries() " + "WHERE end_lsn > '{}'".format(summarized_lsn) + ) + lines = details.split("\n") + assert len(lines) == 1, "got exactly one new WAL summary" + tli, start_lsn, end_lsn = lines[0].split("|") + + # Reconstruct the WAL summary file path. + start_hi, start_lo = start_lsn.split("/") + end_hi, end_lo = end_lsn.split("/") + filename = ( + node1.datadir + / "pg_wal" + / "summaries" + / ( + "{:0>8}{:0>8}{:0>8}{:0>8}{:0>8}.summary".format( + tli, start_hi, start_lo, end_hi, end_lo + ) + ) + ) + assert filename.is_file(), "WAL summary file exists" + + # Run pg_walsummary: we expect exactly two modified blocks, block 0 and one + # other. + result = pg_bin.result(["pg_walsummary", "-i", filename]) + lines = result.stdout.split("\n") + assert re.search( + r"(?m)FORK main: block 0$", result.stdout + ), "stdout shows block 0 modified" + assert result.stderr == "", "stderr is empty" + # stdout has a trailing newline, so splitting yields a final empty element. + assert len([line for line in lines if line]) == 2, "UPDATE modified 2 blocks" diff --git a/src/bin/pgbench/Makefile b/src/bin/pgbench/Makefile index 987bf64df9de0..cf29a564e3d8a 100644 --- a/src/bin/pgbench/Makefile +++ b/src/bin/pgbench/Makefile @@ -54,6 +54,7 @@ clean distclean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/pgbench/meson.build b/src/bin/pgbench/meson.build index 12e895796c12e..95d1b410eef78 100644 --- a/src/bin/pgbench/meson.build +++ b/src/bin/pgbench/meson.build @@ -45,4 +45,10 @@ tests += { 't/002_pgbench_no_server.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_pgbench_with_server.py', + 'pyt/test_002_pgbench_no_server.py', + ], + }, } diff --git a/src/bin/pgbench/pyt/test_001_pgbench_with_server.py b/src/bin/pgbench/pyt/test_001_pgbench_with_server.py new file mode 100644 index 0000000000000..52233e5627f25 --- /dev/null +++ b/src/bin/pgbench/pyt/test_001_pgbench_with_server.py @@ -0,0 +1,1546 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pgbench/t/001_pgbench_with_server.pl. + +Exercises pgbench against a live server: initialization (client- and +server-side data generation, partitions, tablespaces, foreign keys), builtin +and custom scripts, deterministic seeded expressions/permute checks, extended +query parameter logging, \\gset/\\aset, pipelines, a large table of expression +and meta-command errors, log sampling, retry on serialization/deadlock errors, +and --exit-on-abort / --continue-on-error. +""" + +# The script constants below embed pgbench scripts verbatim from the Perl +# original; some are single long literals (e.g. 256 repeated arguments) that +# cannot be wrapped without changing their meaning. +# pylint: disable=line-too-long + +import os +import re + +_RE_EMPTY = r"^$" + + +def _check_data_state(node, kind): + """Assert the initialized pgbench tables' filler/history state (per kind).""" + assert ( + node.safe_psql( + "SELECT count(*) AS null_count FROM pgbench_accounts " + "WHERE filler IS NULL LIMIT 10;" + ) + == "0" + ), "{}: filler column of pgbench_accounts has no NULL data".format(kind) + assert ( + node.safe_psql( + "SELECT count(*) AS null_count FROM pgbench_branches " + "WHERE filler IS NULL;" + ) + == "1" + ), "{}: filler column of pgbench_branches has only NULL data".format(kind) + assert ( + node.safe_psql( + "SELECT count(*) AS null_count FROM pgbench_tellers WHERE filler IS NULL;" + ) + == "10" + ), "{}: filler column of pgbench_tellers has only NULL data".format(kind) + assert ( + node.safe_psql("SELECT count(*) AS data_count FROM pgbench_history;") == "0" + ), "{}: pgbench_history has no data".format(kind) + + +def _check_pgbench_logs(node, prefix, nb, minc, maxc, pattern): + """Validate per-thread pgbench log files (count, naming, line format).""" + bdir = node.basedir + logs = [ + str(bdir / e) + for e in os.listdir(bdir) + if re.match(r"^{}\..*$".format(re.escape(prefix)), e) + ] + assert len(logs) == nb, "number of log files" + name_re = re.compile(r"/{}\.\d+(\.\d+)?$".format(re.escape(prefix))) + assert sum(1 for log in logs if name_re.search(log)) == nb, "file name format" + rx = re.compile(pattern) + for log in sorted(logs): + with open(log, encoding="utf-8") as fh: + contents = fh.read().split("\n") + if contents and contents[-1] == "": + contents.pop() + clen = len(contents) + assert clen >= minc, "transaction count for {} ({}) above min".format(log, clen) + assert clen <= maxc, "transaction count for {} ({}) below max".format(log, clen) + assert ( + sum(1 for line in contents if rx.search(line)) == clen + ), "transaction format for {}".format(prefix) + + +def _test_init_and_basic(node, ts_name): + """Connection errors, initialization steps and builtin scripts.""" + labels = ",".join("'l{}'".format(i) for i in range(1, 1001)) + node.pgbench( + "--no-vacuum --client=5 --protocol=prepared --transactions=25", + 0, + [r"processed: 125/125"], + [_RE_EMPTY], + "concurrent OID generation", + { + "001_pgbench_concurrent_insert": ( + "CREATE TYPE pg_temp.e AS ENUM ({}); " + "DROP TYPE pg_temp.e;".format(labels) + ) + }, + ) + + # TODO upstream (PROC_IN_VACUUM scan breakage): tolerate failure. + node.safe_psql("CREATE TABLE ddl_target ()") + try: + node.pgbench( + "--no-vacuum --client=5 --protocol=prepared --transactions=50", + 0, + [r"processed: 250/250"], + [_RE_EMPTY], + "concurrent GRANT/VACUUM", + { + "001_pgbench_grant@9": ( + "DO $$\nBEGIN\n" + " PERFORM pg_advisory_xact_lock(42);\n" + " FOR i IN 1 .. 10 LOOP\n" + " GRANT SELECT ON ddl_target TO PUBLIC;\n" + " REVOKE SELECT ON ddl_target FROM PUBLIC;\n" + " END LOOP;\nEND\n$$;\n" + ), + "001_pgbench_vacuum_ddl_target@1": "VACUUM ddl_target;", + }, + ) + except AssertionError: + pass + + node.pgbench( + "no-such-database", + 1, + [_RE_EMPTY], + [ + r"connection to server .* failed", + r'FATAL: database "no-such-database" does not exist', + ], + "no such database", + ) + node.pgbench( + "-S -t 1", + 1, + [], + [r"Perhaps you need to do initialization"], + "run without init", + ) + node.pgbench( + "-i", + 0, + [_RE_EMPTY], + [ + r"creating tables", + r"vacuuming", + r"creating primary keys", + r"done in \d+\.\d\d s ", + ], + "pgbench scale 1 initialization", + ) + _check_data_state(node, "client-side") + + node.pgbench( + "--initialize --init-steps=dtpvg --scale=1 --unlogged-tables " + "--fillfactor=98 --foreign-keys --quiet --tablespace={ts} " + "--index-tablespace={ts} --partitions=2 " + "--partition-method=hash".format(ts=ts_name), + 0, + [r"(?i)^$"], + [ + r"dropping old tables", + r"creating tables", + r"creating 2 partitions", + r"vacuuming", + r"creating primary keys", + r"creating foreign keys", + r"(?!vacuuming)", + r"done in \d+\.\d\d s ", + ], + "pgbench scale 1 initialization", + ) + node.pgbench( + "--initialize --init-steps=dtpvGvv --no-vacuum --foreign-keys " + "--unlogged-tables --partitions=3", + 0, + [_RE_EMPTY], + [ + r"dropping old tables", + r"creating tables", + r"creating 3 partitions", + r"creating primary keys", + r"generating data \(server-side\)", + r"creating foreign keys", + r"(?!vacuuming)", + r"done in \d+\.\d\d s ", + ], + "pgbench --init-steps", + ) + _check_data_state(node, "server-side") + + +def _test_builtin_scripts(node): + """Run the TPC-B / simple-update / select-only builtin scripts.""" + node.pgbench( + "--transactions=5 -Dfoo=bla --client=2 --protocol=simple --builtin=t" + " --connect -n -v -n", + 0, + [ + r"builtin: TPC-B", + r"clients: 2\b", + r"processed: 10/10", + r"mode: simple", + r"maximum number of tries: 1", + ], + [_RE_EMPTY], + "pgbench tpcb-like", + ) + node.pgbench( + "--transactions=20 --client=5 -M extended --builtin=si -C --no-vacuum -s 1", + 0, + [ + r"builtin: simple update", + r"clients: 5\b", + r"threads: 1\b", + r"processed: 100/100", + r"mode: extended", + ], + [r"scale option ignored"], + "pgbench simple update", + ) + node.pgbench( + "-t 100 -c 7 -M prepared -b se --debug", + 0, + [ + r"builtin: select only", + r"clients: 7\b", + r"threads: 1\b", + r"processed: 700/700", + r"mode: prepared", + ], + [r"vacuum", r"client 0", r"client 1", r"sending", r"receiving", r"executing"], + "pgbench select only", + ) + + +def _detect_nthreads(pg_bin): + """Return 2 if pgbench supports threads on this platform, else 1.""" + result = pg_bin.result(["pgbench", "--jobs", "2", "--bad-option"]) + if "threads are not supported on this platform" in result.stderr: + return 1 + return 2 + + +def _test_custom_scripts(node, nthreads): + """Custom scripts with weights and a few simple/extended variants.""" + node.pgbench( + "-t 100 -c 1 -j {} -M prepared -n".format(nthreads), + 0, + [ + r"type: multiple scripts", + r"mode: prepared", + r"script 1: .*/001_pgbench_custom_script_1", + r"weight: 2", + r"script 2: .*/001_pgbench_custom_script_2", + r"weight: 1", + r"processed: 100/100", + ], + [_RE_EMPTY], + "pgbench custom scripts", + { + "001_pgbench_custom_script_1@1": ( + "-- select only\n" + "\\set aid random(1, :scale * 100000)\n" + "SELECT abalance::INTEGER AS balance\n" + " FROM pgbench_accounts\n" + " WHERE aid=:aid;\n" + ), + "001_pgbench_custom_script_2@2": ( + "-- special variables\n" + "BEGIN;\n" + "\\set foo 1\n" + "-- cast are needed for typing under -M prepared\n" + "SELECT :foo::INT + :scale::INT * :client_id::INT AS bla;\n" + "COMMIT;\n" + ), + }, + ) + for client, mode, num in (("1", "simple", "3"), ("2", "extended", "4")): + total = "10/10" if client == "1" else "20/20" + node.pgbench( + "-n -t 10 -c {} -M {}".format(client, mode), + 0, + [ + r"type: .*/001_pgbench_custom_script_{}".format(num), + r"processed: {}".format(total), + r"mode: {}".format(mode), + ], + [_RE_EMPTY], + "pgbench custom script", + { + "001_pgbench_custom_script_{}".format(num): ( + "-- select only variant\n" + "\\set aid random(1, :scale * 100000)\n" + "BEGIN;\n" + "SELECT abalance::INTEGER AS balance\n" + " FROM pgbench_accounts\n" + " WHERE aid=:aid;\n" + "COMMIT;\n" + ) + }, + ) + + +def _test_param_logging(node): + """Server-side logging of query parameters under several GUC settings.""" + long_sel = ( + "select $$'Valame Dios!' dijo Sancho; 'no le dije yo a vuestra merced " + "que mirase bien lo que hacia?'$$ as long \\gset\n" + ) + invalid_json_script = ( + "select '{ invalid ' as value \\gset\n" + + long_sel + + "select column1::jsonb from (values (:value), (:long)) as q;\n" + ) + div_zero_script = ( + "select '1' as one \\gset\n" + "SELECT 1 / (random() / 2)::int, :one::int, :two::int;\n" + ) + + node.append_conf( + "log_min_duration_statement = 0\n" + "log_parameter_max_length = 0\n" + "log_parameter_max_length_on_error = 0" + ) + node.reload() + node.pgbench( + "-n -t1 -c1 -M prepared", + 2, + [], + [ + r"ERROR: invalid input syntax for type json", + r"(?!unnamed portal with parameters)", + ], + "server parameter logging", + {"001_param_1": invalid_json_script}, + ) + log = node.log_content() + assert not re.search( + r"DETAIL: Parameters: \$1 = '\{ invalid ',", log + ), "no parameters logged" + + node.append_conf( + "log_parameter_max_length = -1\nlog_parameter_max_length_on_error = 64" + ) + node.reload() + node.pgbench( + "-n -t1 -c1 -M prepared", + 2, + [], + [ + r"ERROR: division by zero", + r"CONTEXT: unnamed portal with parameters: \$1 = '1', \$2 = NULL", + ], + "server parameter logging", + {"001_param_2": div_zero_script}, + ) + node.pgbench( + "-n -t1 -c1 -M prepared", + 2, + [], + [ + r"ERROR: invalid input syntax for type json", + r"CONTEXT: JSON data, line 1: \{ invalid\.\.\.[\r\n]+unnamed portal " + r"with parameters: \$1 = '\{ invalid ', \$2 = '''Valame Dios!'' dijo " + r"Sancho; ''no le dije yo a vuestra merced que \.\.\.'", + ], + "server parameter logging", + {"001_param_3": invalid_json_script}, + ) + log = node.log_content() + assert re.search( + r"DETAIL: Parameters: \$1 = '\{ invalid ', \$2 = '''Valame Dios!'' " + r"dijo Sancho; ''no le dije yo a vuestra merced que mirase bien lo que " + r"hacia\?'''", + log, + ), "parameter report does not truncate" + + node.append_conf( + "log_min_duration_statement = -1\n" + "log_parameter_max_length = 7\n" + "log_parameter_max_length_on_error = -1" + ) + node.reload() + node.pgbench( + "-n -t1 -c1 -M prepared", + 2, + [], + [ + r"ERROR: division by zero", + r"CONTEXT: unnamed portal with parameters: \$1 = '1', \$2 = NULL", + ], + "server parameter logging", + {"001_param_4": div_zero_script}, + ) + node.append_conf("log_min_duration_statement = 0") + node.reload() + node.pgbench( + "-n -t1 -c1 -M prepared", + 2, + [], + [ + r"ERROR: invalid input syntax for type json", + r"CONTEXT: JSON data, line 1: \{ invalid\.\.\.[\r\n]+unnamed portal " + r"with parameters: \$1 = '\{ invalid ', \$2 = '''Valame Dios!'' dijo " + r"Sancho; ''no le dije yo a vuestra merced que mirase bien lo que " + r"hacia\?'", + ], + "server parameter logging", + {"001_param_5": invalid_json_script}, + ) + log = node.log_content() + assert re.search( + r"DETAIL: Parameters: \$1 = '\{ inval\.\.\.', \$2 = '''Valame\.\.\.'", log + ), "parameter report truncates" + + node.pgbench( + "-n -t1 -c1 -M prepared", + 2, + [], + [ + r'ERROR: invalid input syntax for type smallint: "1a"', + r"CONTEXT: unnamed portal parameter \$2 = '1a'", + ], + "server parameter logging", + { + "001_param_6": ( + "select 42 as value1, '1a' as value2 \\gset\n" + "select :value1::smallint, :value2::smallint;\n" + ) + }, + ) + node.append_conf( + "log_min_duration_statement = -1\n" + "log_parameter_max_length_on_error = 0\n" + "log_parameter_max_length = -1" + ) + node.reload() + + +def _test_seeded_random_determinism(node): + """A seeded run produces identical random values across two invocations.""" + node.safe_psql( + "CREATE UNLOGGED TABLE seeded_random(seed INT8 NOT NULL, " + "rand TEXT NOT NULL, val INTEGER NOT NULL);" + ) + seed = 123456789 + for i in (1, 2): + node.pgbench( + "--random-seed={} -t 1".format(seed), + 0, + [r"processed: 1/1"], + [r"setting random seed to {}\b".format(seed)], + "random seeded with {}".format(seed), + { + "001_pgbench_random_seed_{}".format(i): ( + "-- test random functions\n" + "\\set ur random(1000, 1999)\n" + "\\set er random_exponential(2000, 2999, 2.0)\n" + "\\set gr random_gaussian(3000, 3999, 3.0)\n" + "\\set zr random_zipfian(4000, 4999, 1.5)\n" + "INSERT INTO seeded_random(seed, rand, val) VALUES\n" + " (:random_seed, 'uniform', :ur),\n" + " (:random_seed, 'exponential', :er),\n" + " (:random_seed, 'gaussian', :gr),\n" + " (:random_seed, 'zipfian', :zr);\n" + ) + }, + ) + result = node.psql_capture( + "SELECT seed, rand, val, COUNT(*) FROM seeded_random " + "GROUP BY seed, rand, val" + ) + assert result.rc == 0, "psql seeded_random count ok" + assert result.stderr == "", "psql seeded_random count stderr is empty" + for kind, lead in ( + ("uniform", "1"), + ("exponential", "2"), + ("gaussian", "3"), + ("zipfian", "4"), + ): + assert re.search( + r"\b{}\|{}\|{}\d\d\d\|2".format(seed, kind, lead), result.stdout + ), "psql seeded_random count {}".format(kind) + node.safe_psql("DROP TABLE seeded_random;") + + +_NESTED_IF_SCRIPT = "\n\t\t\t\\if false\n\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\if true\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\elif true\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\else\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\endif\n\t\t\t\tSELECT 1 / 0;\n\t\t\t\\elif false\n\t\t\t\t\\if true\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\elif true\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\else\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\endif\n\t\t\t\\else\n\t\t\t\t\\if false\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\elif false\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\else\n\t\t\t\t\tSELECT 'correct';\n\t\t\t\t\\endif\n\t\t\t\\endif\n\t\t\t\\if true\n\t\t\t\tSELECT 'correct';\n\t\t\t\\else\n\t\t\t\t\\if true\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\elif true\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\else\n\t\t\t\t\tSELECT 1 / 0;\n\t\t\t\t\\endif\n\t\t\t\\endif\n\t\t" +_BACKSLASH_SCRIPT = "-- run set\n\\set zero 0\n\\set one 1.0\n-- sleep\n\\sleep :one ms\n\\sleep 100 us\n\\sleep 0 s\n\\sleep :zero\n-- setshell and continuation\n\\setshell another_one\\\n echo \\\n :one\n\\set n debug(:another_one)\n-- shell\n\\shell echo shell-echo-output\n" +_GSET_SCRIPT = "-- test gset\n-- no columns\nSELECT \\gset\n-- one value\nSELECT 0 AS i0 \\gset\n\\set i debug(:i0)\n-- two values\nSELECT 1 AS i1, 2 AS i2 \\gset\n\\set i debug(:i1)\n\\set i debug(:i2)\n-- with prefix\nSELECT 3 AS i3 \\gset x_\n\\set i debug(:x_i3)\n-- overwrite existing variable\nSELECT 0 AS i4, 4 AS i4 \\gset\n\\set i debug(:i4)\n-- work on the last SQL command under \\;\n\\; \\; SELECT 0 AS i5 \\; SELECT 5 AS i5 \\; \\; \\gset\n\\set i debug(:i5)\n" +_ASET_SCRIPT = "\n-- test aset, which applies to a combined query\n\\; SELECT 6 AS i6 \\; SELECT 7 AS i7 \\; \\aset\n-- unless it returns more than one row, last is kept\nSELECT 8 AS i6 UNION SELECT 9 ORDER BY 1 DESC \\aset\n\\set i debug(:i6)\n\\set i debug(:i7)\n" +_SERIALIZATION_SCRIPT = "\n-- What's happening:\n-- The first client starts the transaction with the isolation level Repeatable\n-- Read:\n--\n-- BEGIN;\n-- UPDATE xy SET y = ... WHERE x = 1;\n--\n-- The second client starts a similar transaction with the same isolation level:\n--\n-- BEGIN;\n-- UPDATE xy SET y = ... WHERE x = 1;\n-- \n--\n-- The first client commits its transaction, and the second client gets a\n-- serialization error.\n\n\\set delta random(-5000, 5000)\n\n-- The second client will stop here\nSELECT pg_advisory_lock(0);\n\n-- Start transaction with concurrent update\nBEGIN;\nUPDATE xy SET y = y + :delta WHERE x = 1 AND pg_advisory_lock(1) IS NOT NULL;\n\n-- Wait for the second client\nDO $$\nDECLARE\n exists boolean;\n waiters integer;\nBEGIN\n -- The second client always comes in second, and the number of rows in the\n -- table first_client_table reflect this. Here the first client inserts a row,\n -- so the second client will see a non-empty table when repeating the\n -- transaction after the serialization error.\n SELECT EXISTS (SELECT * FROM first_client_table) INTO STRICT exists;\n IF NOT exists THEN\n\t-- Let the second client begin\n\tPERFORM pg_advisory_unlock(0);\n\t-- And wait until the second client tries to get the same lock\n\tLOOP\n\t SELECT COUNT(*) INTO STRICT waiters FROM pg_locks WHERE\n\t locktype = 'advisory' AND objsubid = 1 AND\n\t ((classid::bigint << 32) | objid::bigint = 1::bigint) AND NOT granted;\n\t IF waiters = 1 THEN\n\t\tINSERT INTO first_client_table VALUES (1);\n\n\t\t-- Exit loop\n\t\tEXIT;\n\t END IF;\n\tEND LOOP;\n END IF;\nEND$$;\n\nCOMMIT;\nSELECT pg_advisory_unlock_all();\n" +_DEADLOCK_SCRIPT = "\n-- What's happening:\n-- The first client gets the lock 2.\n-- The second client gets the lock 3 and tries to get the lock 2.\n-- The first client tries to get the lock 3 and one of them gets a deadlock\n-- error.\n--\n-- A client that does not get a deadlock error must hold a lock at the\n-- transaction start. Thus in the end it releases all of its locks before the\n-- client with the deadlock error starts a retry (we do not want any errors\n-- again).\n\n-- Since the client with the deadlock error has not released the blocking locks,\n-- let's do this here.\nSELECT pg_advisory_unlock_all();\n\n-- The second client and the client with the deadlock error stop here\nSELECT pg_advisory_lock(0);\nSELECT pg_advisory_lock(1);\n\n-- The second client and the client with the deadlock error always come after\n-- the first and the number of rows in the table first_client_table reflects\n-- this. Here the first client inserts a row, so in the future the table is\n-- always non-empty.\nDO $$\nDECLARE\n exists boolean;\nBEGIN\n SELECT EXISTS (SELECT * FROM first_client_table) INTO STRICT exists;\n IF exists THEN\n\t-- We are the second client or the client with the deadlock error\n\n\t-- The first client will take care by itself of this lock (see below)\n\tPERFORM pg_advisory_unlock(0);\n\n\tPERFORM pg_advisory_lock(3);\n\n\t-- The second client can get a deadlock here\n\tPERFORM pg_advisory_lock(2);\n ELSE\n\t-- We are the first client\n\n\t-- This code should not be used in a new transaction after an error\n\tINSERT INTO first_client_table VALUES (1);\n\n\tPERFORM pg_advisory_lock(2);\n END IF;\nEND$$;\n\nDO $$\nDECLARE\n num_rows integer;\n waiters integer;\nBEGIN\n -- Check if we are the first client\n SELECT COUNT(*) FROM first_client_table INTO STRICT num_rows;\n IF num_rows = 1 THEN\n\t-- This code should not be used in a new transaction after an error\n\tINSERT INTO first_client_table VALUES (2);\n\n\t-- Let the second client begin\n\tPERFORM pg_advisory_unlock(0);\n\tPERFORM pg_advisory_unlock(1);\n\n\t-- Make sure the second client is ready for deadlock\n\tLOOP\n\t SELECT COUNT(*) INTO STRICT waiters FROM pg_locks WHERE\n\t locktype = 'advisory' AND\n\t objsubid = 1 AND\n\t ((classid::bigint << 32) | objid::bigint = 2::bigint) AND\n\t NOT granted;\n\n\t IF waiters = 1 THEN\n\t -- Exit loop\n\t\tEXIT;\n\t END IF;\n\tEND LOOP;\n\n\tPERFORM pg_advisory_lock(0);\n -- And the second client took care by itself of the lock 1\n END IF;\nEND$$;\n\n-- The first client can get a deadlock here\nSELECT pg_advisory_lock(3);\n\nSELECT pg_advisory_unlock_all();\n" +_PIPELINE_SYNC_SCRIPT = "\n-- test startpipeline\n\\startpipeline\nselect 1;\n\\syncpipeline\n\\syncpipeline\nselect 2;\n\\syncpipeline\nselect 3;\n\\endpipeline\n" + + +_EXPRESSIONS_SCRIPT = """\ +-- integer functions +\\set i1 debug(random(10, 19)) +\\set i2 debug(random_exponential(100, 199, 10.0)) +\\set i3 debug(random_gaussian(1000, 1999, 10.0)) +\\set i4 debug(abs(-4)) +\\set i5 debug(greatest(5, 4, 3, 2)) +\\set i6 debug(11 + least(-5, -4, -3, -2)) +\\set i7 debug(int(7.3)) +-- integer arithmetic and bit-wise operators +\\set i8 debug(17 / (4|1) + ( 4 + (7 >> 2))) +\\set i9 debug(- (3 * 4 - (-(~ 1) + -(~ 0))) / -1 + 3 % -1) +\\set ia debug(10 + (0 + 0 * 0 - 0 / 1)) +\\set ib debug(:ia + :scale) +\\set ic debug(64 % (((2 + 1 * 2 + (1 # 2) | 4 * (2 & 11)) - (1 << 2)) + 2)) +-- double functions and operators +\\set d1 debug(sqrt(+1.5 * 2.0) * abs(-0.8E1)) +\\set d2 debug(double(1 + 1) * (-75.0 / :foo)) +\\set pi debug(pi() * 4.9) +\\set d4 debug(greatest(4, 2, -1.17) * 4.0 * Ln(Exp(1.0))) +\\set d5 debug(least(-5.18, .0E0, 1.0/0) * -3.3) +-- reset variables +\\set i1 0 +\\set d1 false +-- yet another integer function +\\set id debug(random_zipfian(1, 9, 1.3)) +--- pow and power +\\set poweri debug(pow(-3,3)) +\\set powerd debug(pow(2.0,10)) +\\set poweriz debug(pow(0,0)) +\\set powerdz debug(pow(0.0,0.0)) +\\set powernegi debug(pow(-2,-3)) +\\set powernegd debug(pow(-2.0,-3.0)) +\\set powernegd2 debug(power(-5.0,-5.0)) +\\set powerov debug(pow(9223372036854775807, 2)) +\\set powerov2 debug(pow(10,30)) +-- comparisons and logical operations +\\set c0 debug(1.0 = 0.0 and 1.0 != 0.0) +\\set c1 debug(0 = 1 Or 1.0 = 1) +\\set c4 debug(case when 0 < 1 then 32 else 0 end) +\\set c5 debug(case when true then 33 else 0 end) +\\set c6 debug(case when false THEN -1 when 1 = 1 then 13 + 19 + 2.0 end ) +\\set c7 debug(case when (1 > 0) and (1 >= 0) and (0 < 1) and (0 <= 1) and (0 != 1) and (0 = 0) and (0 <> 1) then 35 else 0 end) +\\set c8 debug(CASE \\ + WHEN (1.0 > 0.0) AND (1.0 >= 0.0) AND (0.0 < 1.0) AND (0.0 <= 1.0) AND \\ + (0.0 != 1.0) AND (0.0 = 0.0) AND (0.0 <> 1.0) AND (0.0 = 0.0) \\ + THEN 36 \\ + ELSE 0 \\ + END) +\\set c9 debug(CASE WHEN NOT FALSE THEN 3 * 12.3333334 END) +\\set ca debug(case when false then 0 when 1-1 <> 0 then 1 else 38 end) +\\set cb debug(10 + mod(13 * 7 + 12, 13) - mod(-19 * 11 - 17, 19)) +\\set cc debug(NOT (0 > 1) AND (1 <= 1) AND NOT (0 >= 1) AND (0 < 1) AND \\ + NOT (false and true) AND (false OR TRUE) AND (NOT :f) AND (NOT FALSE) AND \\ + NOT (NOT TRUE)) +-- NULL value and associated operators +\\set n0 debug(NULL + NULL * exp(NULL)) +\\set n1 debug(:n0) +\\set n2 debug(NOT (:n0 IS NOT NULL OR :d1 IS NULL)) +\\set n3 debug(:n0 IS NULL AND :d1 IS NOT NULL AND :d1 NOTNULL) +\\set n4 debug(:n0 ISNULL AND NOT :n0 IS TRUE AND :n0 IS NOT FALSE) +\\set n5 debug(CASE WHEN :n IS NULL THEN 46 ELSE NULL END) +-- use a variables of all types +\\set n6 debug(:n IS NULL AND NOT :f AND :t) +-- conditional truth +\\set cs debug(CASE WHEN 1 THEN TRUE END AND CASE WHEN 1.0 THEN TRUE END AND CASE WHEN :n THEN NULL ELSE TRUE END) +-- hash functions +\\set h0 debug(hash(10, 5432)) +\\set h1 debug(:h0 = hash_murmur2(10, 5432)) +\\set h3 debug(hash_fnv1a(10, 5432)) +\\set h4 debug(hash(10)) +\\set h5 debug(hash(10) = hash(10, :default_seed)) +-- lazy evaluation +\\set zy 0 +\\set yz debug(case when :zy = 0 then -1 else (1 / :zy) end) +\\set yz debug(case when :zy = 0 or (1 / :zy) < 0 then -1 else (1 / :zy) end) +\\set yz debug(case when :zy > 0 and (1 / :zy) < 0 then (1 / :zy) else 1 end) +-- substitute variables of all possible types +\\set v0 NULL +\\set v1 TRUE +\\set v2 5432 +\\set v3 -54.21E-2 +SELECT :v0, :v1, :v2, :v3; +-- if tests +\\set nope 0 +\\if 1 > 0 +\\set id debug(65) +\\elif 0 +\\set nope 1 +\\else +\\set nope 1 +\\endif +\\if 1 < 0 +\\set nope 1 +\\elif 1 > 0 +\\set ie debug(74) +\\else +\\set nope 1 +\\endif +\\if 1 < 0 +\\set nope 1 +\\elif 1 < 0 +\\set nope 1 +\\else +\\set if debug(83) +\\endif +\\if 1 = 1 +\\set ig debug(86) +\\elif 0 +\\set nope 1 +\\endif +\\if 1 = 0 +\\set nope 1 +\\elif 1 <> 0 +\\set ih debug(93) +\\endif +-- must be zero if false branches where skipped +\\set nope debug(:nope) +-- check automatic variables +\\set sc debug(:scale) +\\set ci debug(:client_id) +\\set rs debug(:random_seed) +-- minint constant parsing +\\set min debug(-9223372036854775808) +\\set max debug(-(:min + 1)) +-- parametric pseudorandom permutation function +\\set t debug(permute(0, 2) + permute(1, 2) = 1) +\\set t debug(permute(0, 3) + permute(1, 3) + permute(2, 3) = 3) +\\set t debug(permute(0, 4) + permute(1, 4) + permute(2, 4) + permute(3, 4) = 6) +\\set t debug(permute(0, 5) + permute(1, 5) + permute(2, 5) + permute(3, 5) + permute(4, 5) = 10) +\\set t debug(permute(0, 16) + permute(1, 16) + permute(2, 16) + permute(3, 16) + \\ + permute(4, 16) + permute(5, 16) + permute(6, 16) + permute(7, 16) + \\ + permute(8, 16) + permute(9, 16) + permute(10, 16) + permute(11, 16) + \\ + permute(12, 16) + permute(13, 16) + permute(14, 16) + permute(15, 16) = 120) +-- random sanity checks +\\set size random(2, 1000) +\\set v random(0, :size - 1) +\\set p permute(:v, :size) +\\set t debug(0 <= :p and :p < :size and :p = permute(:v + :size, :size) and :p <> permute(:v + 1, :size)) +-- actual values +\\set t debug(permute(:v, 1) = 0) +\\set t debug(permute(0, 2, 5431) = 0 and permute(1, 2, 5431) = 1 and \\ + permute(0, 2, 5433) = 1 and permute(1, 2, 5433) = 0) +-- check permute's portability across architectures +\\set size debug(:max - 10) +\\set t debug(permute(:size-1, :size, 5432) = 520382784483822430 and \\ + permute(:size-2, :size, 5432) = 1143715004660802862 and \\ + permute(:size-3, :size, 5432) = 447293596416496998 and \\ + permute(:size-4, :size, 5432) = 916527772266572956 and \\ + permute(:size-5, :size, 5432) = 2763809008686028849 and \\ + permute(:size-6, :size, 5432) = 8648551549198294572 and \\ + permute(:size-7, :size, 5432) = 4542876852200565125)""" + + +_EXPRESSIONS_EXPECTED = [ + r"setting random seed to 5432\b", + r"command=1.: int 17\b", + r"command=2.: int 104\b", + r"command=3.: int 1498\b", + r"command=4.: int 4\b", + r"command=5.: int 5\b", + r"command=6.: int 6\b", + r"command=7.: int 7\b", + r"command=8.: int 8\b", + r"command=9.: int 9\b", + r"command=10.: int 10\b", + r"command=11.: int 11\b", + r"command=12.: int 12\b", + r"command=15.: double 15\b", + r"command=16.: double 16\b", + r"command=17.: double 17\b", + r"command=20.: int 3\b", + r"command=21.: double -27\b", + r"command=22.: double 1024\b", + r"command=23.: double 1\b", + r"command=24.: double 1\b", + r"command=25.: double -0.125\b", + r"command=26.: double -0.125\b", + r"command=27.: double -0.00032\b", + r"command=28.: double 8.50705917302346e\+0?37\b", + r"command=29.: double 1e\+0?30\b", + r"command=30.: boolean false\b", + r"command=31.: boolean true\b", + r"command=32.: int 32\b", + r"command=33.: int 33\b", + r"command=34.: double 34\b", + r"command=35.: int 35\b", + r"command=36.: int 36\b", + r"command=37.: double 37\b", + r"command=38.: int 38\b", + r"command=39.: int 39\b", + r"command=40.: boolean true\b", + r"command=41.: null\b", + r"command=42.: null\b", + r"command=43.: boolean true\b", + r"command=44.: boolean true\b", + r"command=45.: boolean true\b", + r"command=46.: int 46\b", + r"command=47.: boolean true\b", + r"command=48.: boolean true\b", + r"command=49.: int -5817877081768721676\b", + r"command=50.: boolean true\b", + r"command=51.: int -7793829335365542153\b", + r"command=52.: int -?\d+\b", + r"command=53.: boolean true\b", + r"command=65.: int 65\b", + r"command=74.: int 74\b", + r"command=83.: int 83\b", + r"command=86.: int 86\b", + r"command=93.: int 93\b", + r"command=95.: int 0\b", + r"command=96.: int 1\b", + r"command=97.: int 0\b", + r"command=98.: int 5432\b", + r"command=99.: int -9223372036854775808\b", + r"command=100.: int 9223372036854775807\b", + r"command=101.: boolean true\b", + r"command=102.: boolean true\b", + r"command=103.: boolean true\b", + r"command=104.: boolean true\b", + r"command=105.: boolean true\b", + r"command=109.: boolean true\b", + r"command=110.: boolean true\b", + r"command=111.: boolean true\b", + r"command=113.: boolean true\b", +] + + +_ERRORS = [ + ( + "sql syntax error", + 2, + [r"ERROR: syntax error", r"prepared statement .* does not exist"], + "-- SQL syntax error\n SELECT 1 + ;\n", + ), + ( + "sql too many args", + 1, + [r"statement has too many arguments.*\b255\b"], + "-- MAX_ARGS=256 for prepared\n\\set i 0\nSELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i)", + ), + ( + "shell bad command", + 2, + [r"\(shell\) .* meta-command failed"], + "\\shell no-such-command", + ), + ( + "shell undefined variable", + 2, + [r'undefined variable ":nosuchvariable"'], + "-- undefined variable in shell\n\\shell echo ::foo :nosuchvariable\n", + ), + ( + "shell missing command", + 1, + [r"missing command "], + "\\shell", + ), + ( + "shell too many args", + 1, + [r'too many arguments in command "shell"'], + "-- 256 arguments to \\shell\n\\shell echo arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg arg", + ), + ( + "set syntax error", + 1, + [r'syntax error in command "set"'], + "\\set i 1 +", + ), + ( + "set no such function", + 1, + [r"unexpected function name"], + "\\set i noSuchFunction()", + ), + ( + "set invalid variable name", + 2, + [r"invalid variable name"], + "\\set . 1", + ), + ( + "set division by zero", + 2, + [r"division by zero"], + "\\set i 1/0", + ), + ( + "set undefined variable", + 2, + [r'undefined variable "nosuchvariable"'], + "\\set i :nosuchvariable", + ), + ( + "set unexpected char", + 1, + [r"unexpected character .;."], + "\\set i ;", + ), + ( + "set too many args", + 2, + [r"too many function arguments"], + "\\set i least(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)", + ), + ( + "set empty random range", + 2, + [r"empty range given to random"], + "\\set i random(5,3)", + ), + ( + "set random range too large", + 2, + [r"random range is too large"], + "\\set i random(:minint, :maxint)", + ), + ( + "set gaussian param too small", + 2, + [r"gaussian param.* at least 2"], + "\\set i random_gaussian(0, 10, 1.0)", + ), + ( + "set exponential param greater 0", + 2, + [r"exponential parameter must be greater "], + "\\set i random_exponential(0, 10, 0.0)", + ), + ( + "set zipfian param to 1", + 2, + [r"zipfian parameter must be in range \[1\.001, 1000\]"], + "\\set i random_zipfian(0, 10, 1)", + ), + ( + "set zipfian param too large", + 2, + [r"zipfian parameter must be in range \[1\.001, 1000\]"], + "\\set i random_zipfian(0, 10, 1000000)", + ), + ( + "set non numeric value", + 2, + [r'malformed variable "foo" value: "bla"'], + "\\set i :foo + 1", + ), + ( + "set no expression", + 1, + [r"syntax error"], + "\\set i", + ), + ( + "set missing argument", + 1, + [r"missing argument"], + "\\set", + ), + ( + "set not a bool", + 2, + [r"cannot coerce double to boolean"], + "\\set b NOT 0.0", + ), + ( + "set not an int", + 2, + [r"cannot coerce boolean to int"], + "\\set i TRUE + 2", + ), + ( + "set not a double", + 2, + [r"cannot coerce boolean to double"], + "\\set d ln(TRUE)", + ), + ( + "set case error", + 1, + [r'syntax error in command "set"'], + "\\set i CASE TRUE THEN 1 ELSE 0 END", + ), + ( + "set random error", + 2, + [r"cannot coerce boolean to int"], + "\\set b random(FALSE, TRUE)", + ), + ( + "set number of args mismatch", + 1, + [r"unexpected number of arguments"], + "\\set d ln(1.0, 2.0))", + ), + ( + "set at least one arg", + 1, + [r"at least one argument expected"], + "\\set i greatest())", + ), + ( + "set double to int overflow", + 2, + [r"double to int overflow for 100"], + "\\set i int(1E32)", + ), + ( + "set bigint add overflow", + 2, + [r"int add out"], + "\\set i (1<<62) + (1<<62)", + ), + ( + "set bigint sub overflow", + 2, + [r"int sub out"], + "\\set i 0 - (1<<62) - (1<<62) - (1<<62)", + ), + ( + "set bigint mul overflow", + 2, + [r"int mul out"], + "\\set i 2 * (1<<62)", + ), + ( + "set bigint div out of range", + 2, + [r"bigint div out of range"], + "\\set i :minint / -1", + ), + ( + "setshell not an int", + 2, + [r"command must return an integer"], + "\\setshell i echo -n one", + ), + ( + "setshell missing arg", + 1, + [r"missing argument "], + "\\setshell var", + ), + ( + "setshell no such command", + 2, + [r"could not read result "], + "\\setshell var no-such-command", + ), + ( + "sleep undefined variable", + 2, + [r"sleep: undefined variable"], + "\\sleep :nosuchvariable", + ), + ( + "sleep too many args", + 1, + [r"too many arguments"], + "\\sleep too many args", + ), + ( + "sleep missing arg", + 1, + [r"missing argument", r"\\sleep"], + "\\sleep", + ), + ( + "sleep unknown unit", + 1, + [r"unrecognized time unit"], + "\\sleep 1 week", + ), + ( + "misc invalid backslash command", + 1, + [r'invalid command .* "nosuchcommand"'], + "\\nosuchcommand", + ), + ( + "misc empty script", + 1, + [r"empty command list for script"], + "", + ), + ( + "bad boolean", + 2, + [r"malformed variable.*trueXXX"], + "\\set b :badtrue or true", + ), + ( + "invalid permute size", + 2, + [r"permute size parameter must be greater than zero"], + "\\set i permute(0, 0)", + ), + ( + "gset no row", + 2, + [r"expected one row, got 0\b"], + "SELECT WHERE FALSE \\gset", + ), + ( + "gset alone", + 1, + [r"gset must follow an SQL command"], + "\\gset", + ), + ( + "gset no SQL", + 1, + [r"gset must follow an SQL command"], + "\\set i +1\n\\gset", + ), + ( + "gset too many arguments", + 1, + [r"too many arguments"], + "SELECT 1 \\gset a b", + ), + ( + "gset after gset", + 1, + [r"gset must follow an SQL command"], + "SELECT 1 AS i \\gset\n\\gset", + ), + ( + "gset non SELECT", + 2, + [r"expected one row, got 0"], + "DROP TABLE IF EXISTS no_such_table \\gset", + ), + ( + "gset bad default name", + 2, + [r"error storing into variable \?column\?"], + "SELECT 1 \\gset", + ), + ( + "gset bad name", + 2, + [r"error storing into variable bad name!"], + 'SELECT 1 AS "bad name!" \\gset', + ), +] + + +def _test_expressions(node): + """Deterministic seeded expression and permute checks.""" + node.pgbench( + "--random-seed=5432 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dn=null " + "-Dt=t -Df=of -Dd=1.0", + 0, + [r"type: .*/001_pgbench_expressions", r"processed: 1/1"], + _EXPRESSIONS_EXPECTED, + "pgbench expressions", + {"001_pgbench_expressions": _EXPRESSIONS_SCRIPT}, + ) + + +def _test_nested_ifs(node): + """Nested \\if/\\elif/\\else constructs select the right branch.""" + node.pgbench( + "--no-vacuum --client=1 --exit-on-abort --transactions=1", + 0, + [r"actually processed"], + [_RE_EMPTY], + "nested ifs", + {"pgbench_nested_if": _NESTED_IF_SCRIPT}, + ) + + +def _test_backslash_commands(node): + """\\set, \\sleep, \\setshell continuation, \\shell.""" + node.pgbench( + "-t 1", + 0, + [ + r"type: .*/001_pgbench_backslash_commands", + r"processed: 1/1", + r"shell-echo-output", + ], + [r"command=8.: int 1\b"], + "pgbench backslash commands", + {"001_pgbench_backslash_commands": _BACKSLASH_SCRIPT}, + ) + + +def _test_gset_aset(node): + """\\gset and \\aset behavior, including their error cases.""" + node.pgbench( + "-t 1", + 0, + [r"type: .*/001_pgbench_gset", r"processed: 1/1"], + [ + r"command=3.: int 0\b", + r"command=5.: int 1\b", + r"command=6.: int 2\b", + r"command=8.: int 3\b", + r"command=10.: int 4\b", + r"command=12.: int 5\b", + ], + "pgbench gset command", + {"001_pgbench_gset": _GSET_SCRIPT}, + ) + node.pgbench( + "-t 1", + 2, + [r"type: .*/001_pgbench_gset_two_rows", r"processed: 0/1"], + [r"expected one row, got 2\b"], + "pgbench gset command with two rows", + { + "001_pgbench_gset_two_rows": ( + "\nSELECT 5432 AS fail UNION SELECT 5433 ORDER BY 1 \\gset\n" + ) + }, + ) + node.pgbench( + "-t 1", + 0, + [r"type: .*/001_pgbench_aset", r"processed: 1/1"], + [r"command=3.: int 8\b", r"command=4.: int 7\b"], + "pgbench aset command", + {"001_pgbench_aset": _ASET_SCRIPT}, + ) + node.pgbench( + "-t 1", + 2, + [r"type: .*/001_pgbench_aset_empty", r"processed: 0/1"], + [r"undefined variable \"i8\"", r"evaluation of meta-command failed\b"], + "pgbench aset command with empty result", + { + "001_pgbench_aset_empty": ( + "\n-- empty result\n\\; SELECT 5432 AS i8 WHERE FALSE \\; \\aset\n" + "\\set i debug(:i8)\n" + ) + }, + ) + + +def _test_pipelines(node): + """\\startpipeline / \\syncpipeline / \\endpipeline and their errors.""" + select_ten = "select 1;\n" * 10 + node.pgbench( + "-t 1 -n -M extended", + 0, + [r"type: .*/001_pgbench_pipeline", r"actually processed: 1/1"], + [], + "working \\startpipeline", + { + "001_pgbench_pipeline": "\n-- test startpipeline\n\\startpipeline\n" + + select_ten + + "\n\\endpipeline\n" + }, + ) + node.pgbench( + "-t 1 -n -M extended", + 0, + [r"type: .*/001_pgbench_pipeline_sync", r"actually processed: 1/1"], + [], + "working \\startpipeline with \\syncpipeline", + {"001_pgbench_pipeline_sync": _PIPELINE_SYNC_SCRIPT}, + ) + node.pgbench( + "-t 1 -n -M prepared", + 0, + [r"type: .*/001_pgbench_pipeline_prep", r"actually processed: 1/1"], + [], + "working \\startpipeline", + { + "001_pgbench_pipeline_prep": "\n-- test startpipeline\n\\startpipeline\n" + "\\endpipeline\n\\startpipeline\n" + select_ten + "\n\\endpipeline\n" + }, + ) + _test_pipeline_errors(node) + _test_pipeline_implicit_xact(node) + node.pgbench( + "-c4 -t 10 -n -M prepared", + 0, + [ + r"type: .*/001_pgbench_pipeline_serializable", + r"actually processed: (\d+)/\1", + ], + [], + "working \\startpipeline with serializable", + { + "001_pgbench_pipeline_serializable": ( + "\n-- test startpipeline with serializable\n\\startpipeline\n" + "BEGIN ISOLATION LEVEL SERIALIZABLE;\n" + + select_ten + + "END;\n\\endpipeline\n" + ) + }, + ) + + +def _test_pipeline_errors(node): + """The simple pipeline misuse errors (twice/no-start/gset/unclosed).""" + cases = [ + ( + "001_pgbench_pipeline_2", + r"already in pipeline mode", + "error: call \\startpipeline twice", + "\n-- startpipeline twice\n\\startpipeline\n\\startpipeline\n", + "-t 1", + ), + ( + "001_pgbench_pipeline_3", + r"not in pipeline mode", + "error: \\endpipeline with no start", + "\n-- pipeline not started\n\\endpipeline\n", + "-t 1", + ), + ( + "001_pgbench_pipeline_4", + r"gset is not allowed in pipeline mode", + "error: \\gset not allowed in pipeline mode", + "\n\\startpipeline\nselect 1 \\gset f\n\\endpipeline\n", + "-t 1", + ), + ( + "001_pgbench_pipeline_5", + r"end of script reached with pipeline open", + "error: call \\startpipeline without \\endpipeline in a single " + "transaction", + "\n-- startpipeline only with single transaction\n\\startpipeline\n", + "-t 1", + ), + ( + "001_pgbench_pipeline_6", + r"end of script reached with pipeline open", + "error: call \\startpipeline without \\endpipeline", + "\n-- startpipeline only\n\\startpipeline\n", + "-t 2", + ), + ( + "001_pgbench_pipeline_7", + r"end of script reached with pipeline open", + "error: call \\startpipeline and \\syncpipeline without \\endpipeline", + "\n-- startpipeline with \\syncpipeline only\n\\startpipeline\n" + "\\syncpipeline\n", + "-t 2", + ), + ] + for name, err, msg, script, topt in cases: + node.pgbench( + "{} -n -M extended".format(topt), + 2, + [], + [err], + msg, + {name: script}, + ) + + +def _test_pipeline_implicit_xact(node): + """SET LOCAL / REINDEX / VACUUM / subtrans / LOCK in a pipeline xact.""" + cases = [ + ( + "001_pgbench_pipeline_set_local_1", + 0, + [r"WARNING: SET LOCAL can only be used in transaction blocks"], + "SET LOCAL outside implicit transaction block of pipeline", + "\n\\startpipeline\nSET LOCAL statement_timeout='1h';\n\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_set_local_2", + 0, + [_RE_EMPTY], + "SET LOCAL inside implicit transaction block of pipeline", + "\n\\startpipeline\nSELECT 1;\nSET LOCAL statement_timeout='1h';\n" + "\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_set_local_3", + 0, + [r"WARNING: SET LOCAL can only be used in transaction blocks"], + "SET LOCAL and \\syncpipeline", + "\n\\startpipeline\nSELECT 1;\n\\syncpipeline\n" + "SET LOCAL statement_timeout='1h';\n\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_reindex_1", + 0, + [], + "REINDEX CONCURRENTLY outside implicit transaction block of pipeline", + "\n\\startpipeline\nREINDEX TABLE CONCURRENTLY pgbench_accounts;\n" + "SELECT 1;\n\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_reindex_2", + 2, + [], + "error: REINDEX CONCURRENTLY inside implicit transaction block of " + "pipeline", + "\n\\startpipeline\nSELECT 1;\n" + "REINDEX TABLE CONCURRENTLY pgbench_accounts;\n\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_vacuum_1", + 0, + [], + "VACUUM outside implicit transaction block of pipeline", + "\n\\startpipeline\nVACUUM pgbench_accounts;\n\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_vacuum_2", + 2, + [], + "error: VACUUM inside implicit transaction block of pipeline", + "\n\\startpipeline\nSELECT 1;\nVACUUM pgbench_accounts;\n" + "\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_subtrans", + 2, + [], + "error: subtransactions not allowed in pipeline", + "\n\\startpipeline\nSAVEPOINT a;\nSELECT 1;\nROLLBACK TO SAVEPOINT a;\n" + "SELECT 2;\n\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_lock_1", + 2, + [], + "error: LOCK TABLE outside implicit transaction block of pipeline", + "\n\\startpipeline\nLOCK pgbench_accounts;\nSELECT 1;\n\\endpipeline\n", + ), + ( + "001_pgbench_pipeline_lock_2", + 0, + [], + "LOCK TABLE inside implicit transaction block of pipeline", + "\n\\startpipeline\nSELECT 1;\nLOCK pgbench_accounts;\n\\endpipeline\n", + ), + ] + for name, status, err, msg, script in cases: + node.pgbench("-t 1 -n -M extended", status, [], err, msg, {name: script}) + + +def _test_errors_table(node): + """The large table of expression and meta-command errors.""" + base = ( + "-n -t 1 -Dfoo=bla -Dnull=null -Dtrue=true -Done=1 -Dzero=0.0 " + "-Dbadtrue=trueXXX -Dmaxint=9223372036854775807 " + "-Dminint=-9223372036854775808 -M prepared" + ) + for name, status, err, script in _ERRORS: + assert status != 0, 'invalid expected status for test "{}"'.format(name) + fname = "001_pgbench_error_" + name.replace(" ", "_") + out = [_RE_EMPTY] if status == 1 else [r"processed: 0/1"] + node.pgbench( + base, status, out, err, "pgbench script error: " + name, {fname: script} + ) + + +def _test_throttling(node): + """--rate / --latency-limit throttling, including late throttling.""" + node.pgbench( + "-t 100 -S --rate=100000 --latency-limit=1000000 -c 2 -n -r", + 0, + [r"processed: 200/200", r"builtin: select only"], + [_RE_EMPTY], + "pgbench throttling", + ) + node.pgbench( + "-t 10 --rate=100000 --latency-limit=1 -n -r", + 0, + [ + r"processed: [01]/10", + r"type: .*/001_pgbench_sleep", + r"above the 1.0 ms latency limit: [01]/", + ], + [_RE_EMPTY], + "pgbench late throttling", + {"001_pgbench_sleep": "\\sleep 2ms"}, + ) + + +def _test_logs(node): + """--log sampling and per-thread log file format.""" + bdir = node.basedir + node.pgbench( + "-n -S -t 50 -c 2 --log --sampling-rate=0.5", + 0, + [r"select only", r"processed: 100/100"], + [_RE_EMPTY], + "pgbench logs", + None, + "--log-prefix={}/001_pgbench_log_2".format(bdir), + ) + _check_pgbench_logs( + node, "001_pgbench_log_2", 1, 8, 92, r"^[01] \d{1,2} \d+ \d \d+ \d+$" + ) + node.pgbench( + "-n -b select-only -t 10 -l", + 0, + [r"select only", r"processed: 10/10"], + [_RE_EMPTY], + "pgbench logs contents", + None, + "--log-prefix={}/001_pgbench_log_3".format(bdir), + ) + _check_pgbench_logs( + node, "001_pgbench_log_3", 1, 10, 10, r"^0 \d{1,2} \d+ \d \d+ \d+$" + ) + node.pgbench( + "--no-vacuum", + 2, + [r"processed: 1/10"], + [ + r"client 0 aborted: end of script reached without completing the " + r"last transaction" + ], + "incomplete transaction block", + {"001_pgbench_incomplete_transaction_block": "BEGIN;SELECT 1;"}, + ) + + +def _test_retry(node): + """Serialization and deadlock errors with --max-tries retry.""" + node.safe_psql( + "CREATE UNLOGGED TABLE first_client_table (value integer); " + "CREATE UNLOGGED TABLE xy (x integer, y integer); " + "INSERT INTO xy VALUES (1, 2);" + ) + serial_err = ( + r"(?s)(client (0|1) sending UPDATE xy SET y = y \+ -?\d+\b).*" + r"client \2 got an error in command 3 \(SQL\) of script 0; " + r"ERROR: could not serialize access due to concurrent update\b.*" + r"\1" + ) + old_opts = os.environ.get("PGOPTIONS") + os.environ["PGOPTIONS"] = "-c default_transaction_isolation=repeatable\\ read" + try: + node.pgbench( + "-n -c 2 -t 1 --debug --verbose-errors --max-tries 2", + 0, + [ + r"processed: 2/2\b", + r"number of transactions retried: 1\b", + r"total number of retries: 1\b", + ], + [serial_err], + "concurrent update with retrying", + {"001_pgbench_serialization": _SERIALIZATION_SCRIPT}, + ) + finally: + _restore_pgoptions(old_opts) + node.safe_psql("DELETE FROM first_client_table;") + + deadlock_err = ( + r"client (0|1) got an error in command (3|5) \(SQL\) of script 0; " + r"ERROR: deadlock detected\b" + ) + os.environ["PGOPTIONS"] = "-c default_transaction_isolation=read\\ committed" + try: + node.pgbench( + "-n -c 2 -t 1 --max-tries 2 --verbose-errors", + 0, + [ + r"processed: 2/2\b", + r"number of transactions retried: 1\b", + r"total number of retries: 1\b", + ], + [deadlock_err], + "deadlock with retrying", + {"001_pgbench_deadlock": _DEADLOCK_SCRIPT}, + ) + finally: + _restore_pgoptions(old_opts) + node.safe_psql("DROP TABLE first_client_table, xy;") + + +def _restore_pgoptions(old_opts): + """Restore (or clear) the PGOPTIONS environment variable.""" + if old_opts is None: + os.environ.pop("PGOPTIONS", None) + else: + os.environ["PGOPTIONS"] = old_opts + + +def _test_exit_on_abort_and_copy(node): + """--exit-on-abort aborts the run; COPY in a script is rejected.""" + node.safe_psql("CREATE TABLE counter(i int); INSERT INTO counter VALUES (0);") + node.pgbench( + "-t 10 -c 2 -j 2 --exit-on-abort", + 2, + [], + [r"division by zero", r"Run was aborted due to an error in thread"], + "test --exit-on-abort", + { + "001_exit_on_abort": ( + "\nupdate counter set i = i+1 returning i \\gset\n" + "\\if :i = 5\n\\set y 1/0\n\\endif\n" + ) + }, + ) + node.pgbench( + "-t 10", + 2, + [], + [r"COPY is not supported in pgbench, aborting"], + "Test copy in script", + {"001_copy": " COPY pgbench_accounts FROM stdin "}, + ) + node.safe_psql("DROP TABLE counter;") + + +def _test_continue_on_error(node): + """--continue-on-error keeps running past per-transaction failures.""" + node.safe_psql("CREATE TABLE unique_table(i int unique);") + node.pgbench( + "-n -t 10 --continue-on-error --failures-detailed", + 0, + [r"processed: 1/10\b", r"other failures: 9\b"], + [], + "test --continue-on-error", + {"001_continue_on_error": "\n\t\tINSERT INTO unique_table VALUES(0);\n\t\t"}, + ) + node.safe_psql("DROP TABLE unique_table;") + + +def test_001_pgbench_with_server(create_pg, pg_bin): + """pgbench end-to-end behavior against a live server.""" + node = create_pg("main", start=False, extra=["--locale", "C"]) + node.start() + ts_dir = node.basedir / "regress_pgbench_tap_1_ts_dir" + ts_dir.mkdir() + node.safe_psql( + "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '{}';".format(ts_dir) + ) + + _test_init_and_basic(node, "regress_pgbench_tap_1_ts") + _test_builtin_scripts(node) + nthreads = _detect_nthreads(pg_bin) + _test_custom_scripts(node, nthreads) + _test_param_logging(node) + _test_expressions(node) + _test_nested_ifs(node) + _test_seeded_random_determinism(node) + _test_backslash_commands(node) + _test_gset_aset(node) + _test_pipelines(node) + _test_errors_table(node) + _test_throttling(node) + _test_logs(node) + _test_retry(node) + _test_exit_on_abort_and_copy(node) + _test_continue_on_error(node) + + node.safe_psql("DROP TABLESPACE regress_pgbench_tap_1_ts") + node.stop() diff --git a/src/bin/pgbench/pyt/test_002_pgbench_no_server.py b/src/bin/pgbench/pyt/test_002_pgbench_no_server.py new file mode 100644 index 0000000000000..6dfe75a636d78 --- /dev/null +++ b/src/bin/pgbench/pyt/test_002_pgbench_no_server.py @@ -0,0 +1,271 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pgbench/t/002_pgbench_no_server.pl. + +pgbench tests which do not need a server. +""" + + +# (name, options string, stderr regex checks). pgbench prints usage/errors to +# stderr and nothing to stdout for these. +_OPTIONS = [ + ( + "bad option", + "-h home -p 5432 -U calvin ---debug --bad-option", + [r"--help.*more information"], + ), + ("no file", "-f no-such-file", [r'could not open file "no-such-file":']), + ("no builtin", "-b no-such-builtin", [r'no builtin script .* "no-such-builtin"']), + ( + "invalid weight", + "--builtin=select-only@one", + [r"invalid weight specification: \@one"], + ), + ("invalid weight", "-b select-only@-1", [r"weight spec.* out of range .*: -1"]), + ("too many scripts", "-S " * 129, [r"at most 128 SQL scripts"]), + ("bad #clients", "-c three", [r'invalid value "three" for option -c/--clients']), + ("bad #threads", "-j eleven", [r'invalid value "eleven" for option -j/--jobs']), + ("bad scale", "-i -s two", [r'invalid value "two" for option -s/--scale']), + ( + "invalid #transactions", + "-t zil", + [r'invalid value "zil" for option -t/--transactions'], + ), + ("invalid duration", "-T ten", [r'invalid value "ten" for option -T/--time']), + ( + "-t XOR -T", + "-N -l --aggregate-interval=5 --log-prefix=notused -t 1000 -T 1", + [r"specify either "], + ), + ( + "-T XOR -t", + "-P 1 --progress-timestamp -l --sampling-rate=0.001 -T 10 -t 1000", + [r"specify either "], + ), + ("bad variable", "--define foobla", [r"invalid variable definition"]), + ("invalid fillfactor", "-F 1", [r"-F/--fillfactor must be in range"]), + ("invalid query mode", "-M no-such-mode", [r"invalid query mode"]), + ("invalid progress", "--progress=0", [r"-P/--progress must be in range"]), + ("invalid rate", "--rate=0.0", [r"invalid rate limit"]), + ("invalid latency", "--latency-limit=0.0", [r"invalid latency limit"]), + ("invalid sampling rate", "--sampling-rate=0", [r"invalid sampling rate"]), + ( + "invalid aggregate interval", + "--aggregate-interval=-3", + [r"--aggregate-interval must be in range"], + ), + ("weight zero", "-b se@0 -b si@0 -b tpcb@0", [r"weight must not be zero"]), + ("init vs run", "-i -S", [r"cannot be used in initialization"]), + ("run vs init", "-S -F 90", [r"cannot be used in benchmarking"]), + ("ambiguous builtin", "-b s", [r"ambiguous"]), + ( + "--progress-timestamp => --progress", + "--progress-timestamp", + [r"allowed only under"], + ), + ("-I without init option", "-I dtg", [r"cannot be used in benchmarking mode"]), + ( + "invalid init step", + "-i -I dta", + [r"unrecognized initialization step", r"Allowed step characters are"], + ), + ( + "bad random seed", + "--random-seed=one", + [ + r'unrecognized random seed option "one"', + r'Expecting an unsigned integer, "time" or "rand"', + r"error while setting random seed from --random-seed option", + ], + ), + ( + "bad partition method", + "-i --partition-method=BAD", + [r'"range"', r'"hash"', r'"BAD"'], + ), + ("bad partition number", "-i --partitions -1", [r"--partitions must be in range"]), + ( + "partition method without partitioning", + "-i --partition-method=hash", + [r"partition-method requires greater than zero --partitions"], + ), + ( + "bad maximum number of tries", + "--max-tries -10", + [r'invalid number of maximum tries: "-10"'], + ), + ( + "an infinite number of tries", + "--max-tries 0", + [ + r"an unlimited number of transaction tries can only be used with " + r"--latency-limit or a duration" + ], + ), + ("sampling => log", "--sampling-rate=0.01", [r"log sampling .* only when"]), + ( + "sampling XOR aggregate", + "-l --sampling-rate=0.1 --aggregate-interval=3", + [r"sampling .* aggregation .* cannot be used at the same time"], + ), + ("aggregate => log", "--aggregate-interval=3", [r"aggregation .* only when"]), + ("log-prefix => log", "--log-prefix=x", [r"prefix .* only when"]), + ( + "duration & aggregation", + "-l -T 1 --aggregate-interval=3", + [r"aggr.* not be higher"], + ), + ("duration % aggregation", "-l -T 5 --aggregate-interval=3", [r"multiple"]), +] + +# (name, stderr regex checks, {filename: contents}). +_SCRIPT_TESTS = [ + ("missing endif", [r"\\if without matching \\endif"], {"if-noendif.sql": "\\if 1"}), + ( + "missing if on elif", + [r"\\elif without matching \\if"], + {"elif-noif.sql": "\\elif 1"}, + ), + ( + "missing if on else", + [r"\\else without matching \\if"], + {"else-noif.sql": "\\else"}, + ), + ( + "missing if on endif", + [r"\\endif without matching \\if"], + {"endif-noif.sql": "\\endif"}, + ), + ( + "elif after else", + [r"\\elif after \\else"], + {"else-elif.sql": "\\if 1\n\\else\n\\elif 0\n\\endif"}, + ), + ( + "else after else", + [r"\\else after \\else"], + {"else-else.sql": "\\if 1\n\\else\n\\else\n\\endif"}, + ), + ( + "if syntax error", + [r'syntax error in command "if"'], + {"if-bad.sql": "\\if\n\\endif\n"}, + ), + ( + "elif syntax error", + [r'syntax error in command "elif"'], + {"elif-bad.sql": "\\if 0\n\\elif +\n\\endif\n"}, + ), + ( + "else syntax error", + [r'unexpected argument in command "else"'], + {"else-bad.sql": "\\if 0\n\\else BAD\n\\endif\n"}, + ), + ( + "endif syntax error", + [r'unexpected argument in command "endif"'], + {"endif-bad.sql": "\\if 0\n\\endif BAD\n"}, + ), + ( + "not enough arguments for least", + [r"at least one argument expected \(least\)"], + {"bad-least.sql": "\\set i least()\n"}, + ), + ( + "not enough arguments for greatest", + [r"at least one argument expected \(greatest\)"], + {"bad-greatest.sql": "\\set i greatest()\n"}, + ), + ( + "not enough arguments for hash", + [r"unexpected number of arguments \(hash\)"], + {"bad-hash-1.sql": "\\set i hash()\n"}, + ), + ( + "too many arguments for hash", + [r"unexpected number of arguments \(hash\)"], + {"bad-hash-2.sql": "\\set i hash(1,2,3)\n"}, + ), + ( + "bigint overflow 1", + [r"bigint constant overflow"], + {"overflow-1.sql": "\\set i 100000000000000000000\n"}, + ), + ( + "double overflow 2", + [r"double constant overflow"], + {"overflow-2.sql": "\\set d 1.0E309\n"}, + ), + ( + "double overflow 3", + [r"double constant overflow"], + {"overflow-3.sql": "\\set d .1E310\n"}, + ), + ("set i", [r"set i 1 ", r"\^ error found here"], {"set_i_op": "\\set i 1 +\n"}), + ( + "not enough arguments to permute", + [r"unexpected number of arguments \(permute\)"], + {"bad-permute-1.sql": "\\set i permute(1)\n"}, + ), + ( + "too many arguments to permute", + [r"unexpected number of arguments \(permute\)"], + {"bad-permute-2.sql": "\\set i permute(1, 2, 3, 4)\n"}, + ), +] + + +def _pgbench(pg_bin, opts, stat, out, err, name): + pg_bin.command_checks_all(["pgbench", *opts.split()], stat, out, err, name) + + +def _pgbench_scripts(pg_bin, testdir, stat, out, err, name, files): + cmd = ["pgbench"] + for fn in sorted(files): + filename = testdir / fn + filename.write_text(files[fn], encoding="utf-8") + cmd += ["--file", str(filename)] + pg_bin.command_checks_all(cmd, stat, out, err, name) + + +def test_pgbench_no_server(pg_bin, tmp_path): + """pgbench option-parsing and script-parsing errors (no server needed).""" + for name, opts, err_checks in _OPTIONS: + _pgbench(pg_bin, opts, 1, [r"^$"], err_checks, "pgbench option error: " + name) + + pg_bin.program_help_ok("pgbench") + pg_bin.program_version_ok("pgbench") + pg_bin.program_options_handling_ok("pgbench") + + _pgbench( + pg_bin, + "-b list", + 0, + [r"^$"], + [ + r"Available builtin scripts:", + r"tpcb-like", + r"simple-update", + r"select-only", + ], + "pgbench builtin list", + ) + + _pgbench( + pg_bin, + "--show-script se", + 0, + [r"^$"], + [ + r"select-only: ", + r"SELECT abalance FROM pgbench_accounts WHERE", + r"(?!UPDATE)", + r"(?!INSERT)", + ], + "pgbench builtin listing", + ) + + for name, err, files in _SCRIPT_TESTS: + _pgbench_scripts( + pg_bin, tmp_path, 1, [r"^$"], err, "pgbench option error: " + name, files + ) diff --git a/src/bin/psql/Makefile b/src/bin/psql/Makefile index be0032652cd78..2f3543712fe43 100644 --- a/src/bin/psql/Makefile +++ b/src/bin/psql/Makefile @@ -81,6 +81,7 @@ clean distclean: check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/psql/meson.build b/src/bin/psql/meson.build index 922b28452672f..5fc3df0c702a9 100644 --- a/src/bin/psql/meson.build +++ b/src/bin/psql/meson.build @@ -80,6 +80,15 @@ tests += { 't/030_pager.pl', ], }, + 'pytest': { + 'env': {'with_readline': readline.found() ? 'yes' : 'no'}, + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_010_tab_completion.py', + 'pyt/test_020_cancel.py', + 'pyt/test_030_pager.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/psql/pyt/test_001_basic.py b/src/bin/psql/pyt/test_001_basic.py new file mode 100644 index 0000000000000..ae0e3983678c5 --- /dev/null +++ b/src/bin/psql/pyt/test_001_basic.py @@ -0,0 +1,363 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/psql/t/001_basic.pl.""" + +import re + +from pypg import append_to_file, slurp_file + +_INSERT = "INSERT INTO tab_psql_single VALUES" + + +def _psql_like(node, sql, expected_stdout, name, on_error_stop=True): + result = node.psql_capture(sql, on_error_stop=on_error_stop) + assert result.rc == 0, "{}: exit code 0".format(name) + assert result.stderr == "", "{}: no stderr".format(name) + assert re.search(expected_stdout, result.stdout), "{}: matches".format(name) + + +def _psql_fails_like(node, sql, expected_stderr, name, replication=None): + result = node.psql_capture(sql, replication=replication) + assert result.rc != 0, "{}: exit code not 0".format(name) + assert re.search(expected_stderr, result.stderr), "{}: matches".format(name) + + +def test_basic(pg_bin, create_pg, tmp_path): + """psql meta-commands, timing, encoding, notify, crash, switches, pipelines.""" + pg_bin.program_help_ok("psql") + pg_bin.program_version_ok("psql") + pg_bin.program_options_handling_ok("psql") + + for arg in ("commands", "variables"): + result = pg_bin.result(["psql", "--help=" + arg]) + assert result.rc == 0, "psql --help={} exit code 0".format(arg) + assert result.stdout != "", "psql --help={} goes to stdout".format(arg) + assert result.stderr == "", "psql --help={} nothing to stderr".format(arg) + + node = create_pg("main", extra=["--locale=C", "--encoding=UTF8"], start=False) + node.append_conf( + "wal_level = 'logical'\nmax_replication_slots = 4\nmax_wal_senders = 4" + ) + node.start() + + _meta(node) + _crash_and_errverbose(node) + _switches(node, tmp_path) + _copy_default(node, tmp_path) + _watch(node) + _g_pipe(node, tmp_path) + _pipelines(node) + + +def _meta(node): + _psql_like(node, "\\copyright", r"Copyright", "\\copyright") + _psql_like(node, "\\help", r"ALTER", "\\help without arguments") + _psql_like(node, "\\help SELECT", r"SELECT", "\\help with argument") + + _psql_fails_like( + node, + "START_REPLICATION 0/0", + r"unexpected PQresultStatus: 8$", + "handling of unexpected PQresultStatus", + replication="database", + ) + + _psql_like( + node, + "\\timing on\nSELECT 1", + r"(?m)^1$\n^Time: \d+[.,]\d\d\d ms", + "\\timing with successful query", + ) + + result = node.psql_capture("\\timing on\nSELECT error") + assert result.rc != 0, "\\timing with query error: query failed" + assert re.search( + r"(?m)^Time: \d+[.,]\d\d\d ms", result.stdout + ), "\\timing with query error: timing output appears" + assert not re.search( + r"(?m)^Time: 0[.,]000 ms", result.stdout + ), "\\timing with query error: timing was updated" + + _psql_like( + node, + "\\echo :ENCODING\nset client_encoding = LATIN1;\n\\echo :ENCODING", + r"(?m)^UTF8$\n^LATIN1$", + "ENCODING variable is set and updated", + ) + + _psql_like( + node, + "LISTEN foo;\nNOTIFY foo;", + r'^Asynchronous notification "foo" received from server process ' + r"with PID \d+\.$", + "notification", + ) + _psql_like( + node, + "LISTEN foo;\nNOTIFY foo, 'bar';", + r'^Asynchronous notification "foo" with payload "bar" received from ' + r"server process with PID \d+\.$", + "notification with payload", + ) + + +def _crash_and_errverbose(node): + result = node.psql_capture( + "SELECT 'before' AS running;\n" + "SELECT pg_terminate_backend(pg_backend_pid());\n" + "SELECT 'AFTER' AS not_running;\n" + ) + assert result.rc == 2, "server crash: psql exit code" + assert re.search(r"before", result.stdout), "server crash: output before crash" + assert not re.search(r"AFTER", result.stdout), "server crash: no output after crash" + assert re.search( + r"psql::2: FATAL: terminating connection due to administrator " + r"command\n" + r"psql::2: server closed the connection unexpectedly\n" + r"\tThis probably means the server terminated abnormally\n" + r"\tbefore or while processing the request\.\n" + r"psql::2: error: connection to server was lost", + result.stderr, + ), "server crash: error message" + + _psql_like( + node, + "SELECT 1;\n\\errverbose", + r"^1\nThere is no previous error\.$", + "\\errverbose with no previous error", + ) + + errverbose = ( + r"(?m)\A^psql::{0}: ERROR: .*$\n" + r"^LINE 1: SELECT error{1}$\n" + r"^ *^.*$\n" + r"^psql::{2}: error: ERROR: [0-9A-Z]{{5}}: .*$\n" + r"^LINE 1: SELECT error{1}$\n" + r"^ *^.*$\n" + r"^LOCATION: .*$" + ) + assert re.search( + errverbose.format(1, ";", 2), + node.psql_capture("SELECT error;\n\\errverbose", on_error_stop=False).stderr, + ), "\\errverbose after normal query with error" + assert re.search( + errverbose.format(2, ";", 3), + node.psql_capture( + "\\set FETCH_COUNT 1\nSELECT error;\n\\errverbose", on_error_stop=False + ).stderr, + ), "\\errverbose after FETCH_COUNT query with error" + assert re.search( + errverbose.format(1, "", 2), + node.psql_capture( + "SELECT error\\gdesc\n\\errverbose", on_error_stop=False + ).stderr, + ), "\\errverbose after \\gdesc with error" + + +def _switches(node, tmp_path): + node.safe_psql("CREATE TABLE tab_psql_single (a int);") + nonexistent = str(tmp_path / "nonexistent") + copy_cmd = "\\copy tab_psql_single FROM '{}'".format(nonexistent) + base = ["psql", "--no-psqlrc", "--single-transaction"] + stop = ["--set", "ON_ERROR_STOP=1"] + + def count(): + return node.safe_psql("SELECT count(*) FROM tab_psql_single") + + node.command_ok( + base + stop + ["-c", _INSERT + " (1)", "-c", _INSERT + " (2)"], + "ON_ERROR_STOP, --single-transaction and multiple -c switches", + ) + assert count() == "2", "--single-transaction commits with ON_ERROR_STOP, -c" + + node.command_fails( + base + stop + ["-c", _INSERT + " (3)", "-c", copy_cmd], + "ON_ERROR_STOP, --single-transaction and multiple -c switches, error", + ) + assert count() == "2", "client-side error rolls back with ON_ERROR_STOP, -c" + + insert_file = str(tmp_path / "tab_insert.sql") + copy_file = str(tmp_path / "tab_copy.sql") + append_to_file(insert_file, _INSERT + " (4);") + append_to_file(copy_file, copy_cmd + ";") + + node.command_ok( + base + stop + ["-f", insert_file, "-f", insert_file], + "ON_ERROR_STOP, --single-transaction and multiple -f switches", + ) + assert count() == "4", "--single-transaction commits with ON_ERROR_STOP, -f" + + node.command_fails( + base + stop + ["-f", insert_file, "-f", copy_file], + "ON_ERROR_STOP, --single-transaction and multiple -f switches, error", + ) + assert count() == "4", "client-side error rolls back with ON_ERROR_STOP, -f" + + node.command_fails( + base + ["-f", insert_file, "-f", insert_file, "-c", copy_cmd], + "no ON_ERROR_STOP, --single-transaction and multiple -f/-c switches", + ) + assert count() == "6", "client-side error commits, no ON_ERROR_STOP, -f/-c" + + node.command_ok( + base + ["-f", insert_file, "-f", insert_file, "-f", copy_file], + "no ON_ERROR_STOP, --single-transaction and multiple -f switches", + ) + assert count() == "8", "client-side error commits, no ON_ERROR_STOP, -f" + + node.command_ok( + base + ["-c", _INSERT + " (5)", "-f", copy_file, "-c", _INSERT + " (6)"], + "no ON_ERROR_STOP, --single-transaction and multiple -c switches", + ) + assert count() == "10", "client-side error commits, no ON_ERROR_STOP, -c" + + +def _copy_default(node, tmp_path): + node.safe_psql( + "CREATE TABLE copy_default (" + "id integer PRIMARY KEY, " + "text_value text NOT NULL DEFAULT 'test', " + "ts_value timestamp without time zone NOT NULL DEFAULT '2022-07-05')" + ) + csv = str(tmp_path / "copy_default.csv") + append_to_file(csv, "1,value,2022-07-04\n") + append_to_file(csv, "2,placeholder,2022-07-03\n") + append_to_file(csv, "3,placeholder,placeholder\n") + _psql_like( + node, + "\\copy copy_default from {} with (format 'csv', default 'placeholder');\n" + "SELECT * FROM copy_default".format(csv), + "1\\|value\\|2022-07-04 00:00:00\n" + "2|test|2022-07-03 00:00:00\n" + "3|test|2022-07-05 00:00:00", + "\\copy from with DEFAULT", + ) + + +def _watch(node): + _psql_like(node, "SELECT 1 \\watch c=3 i=0.01", r"1\n1\n1", "\\watch 3x i=0.01") + _psql_like(node, "SELECT 1 \\watch c=3 i=0.0001", r"1\n1\n1", "\\watch 3x i=0.0001") + _psql_like( + node, "\\set WATCH_INTERVAL 0\nSELECT 1 \\watch c=3", r"1\n1\n1", "\\watch i=0" + ) + _psql_fails_like( + node, "SELECT 3 \\watch m=x", r"incorrect minimum row count", "\\watch bad m" + ) + _psql_fails_like( + node, + "SELECT 3 \\watch m=1 min_rows=2", + r"minimum row count specified more than once", + "\\watch m twice", + ) + _psql_like( + node, + "with x as (\n" + " select now()-backend_start AS howlong\n" + " from pg_stat_activity\n" + " where pid = pg_backend_pid()\n" + ") select 123 from x where howlong < '2 seconds' \\watch i=0.5 m=2", + r"^123$", + "\\watch, 2 minimum rows", + ) + for spec, msg in ( + ("-10", r'incorrect interval value "-10"'), + ("10ab", r'incorrect interval value "10ab"'), + ("10e400", r'incorrect interval value "10e400"'), + ("1 1", r"interval value is specified more than once"), + ("c=1 c=1", r"iteration count is specified more than once"), + ): + _psql_fails_like(node, "SELECT 1 \\watch " + spec, msg, "\\watch " + spec) + + _psql_like( + node, + "\\echo :WATCH_INTERVAL\n\\set WATCH_INTERVAL 10\n\\echo :WATCH_INTERVAL\n" + "\\unset WATCH_INTERVAL\n\\echo :WATCH_INTERVAL", + r"(?m)^2$\n^10$\n^2$", + "WATCH_INTERVAL variable is set and updated", + ) + _psql_fails_like( + node, "\\set WATCH_INTERVAL 1e500", r"is out of range", "WATCH_INTERVAL range" + ) + _psql_like(node, "\\echo :WATCH_INTERVAL", r"(?m)^2$", "WATCH_INTERVAL not altered") + + +def _g_pipe(node, tmp_path): + g_file = str(tmp_path / "g_file_1.out") + pipe = "cat >{}".format(g_file) + + _psql_like(node, "SELECT 'one' \\g | {}".format(pipe), r"", "one command \\g") + assert re.search(r"one", slurp_file(g_file)) + + _psql_like( + node, + "SELECT 'two' \\; SELECT 'three' \\g | {}".format(pipe), + r"", + "two commands \\g", + ) + assert re.search(r"two.*three", slurp_file(g_file), re.S) + + _psql_like( + node, + "\\set SHOW_ALL_RESULTS 0\nSELECT 'four' \\; SELECT 'five' \\g | {}".format( + pipe + ), + r"", + "two commands \\g with only last result", + ) + c3 = slurp_file(g_file) + assert re.search(r"five", c3) + assert not re.search(r"four", c3) + + _psql_like( + node, + "copy (values ('foo'),('bar')) to stdout \\g | {}".format(pipe), + r"", + "copy output passed to \\g pipe", + ) + assert re.search(r"foo.*bar", slurp_file(g_file), re.S) + + +def _pipelines(node): + aborts = r"COPY in a pipeline is not supported, aborting connection" + node.safe_psql("CREATE TABLE psql_pipeline()") + log_location = node.log.stat().st_size + + _psql_fails_like( + node, + "\\startpipeline\nCOPY psql_pipeline FROM STDIN;\nSELECT 'val1';\n" + "\\syncpipeline\n\\endpipeline", + aborts, + "COPY FROM in pipeline: fails", + ) + node.wait_for_log( + r"FATAL: .*terminating connection because protocol synchronization was lost", + log_location, + ) + + _psql_fails_like( + node, + "\\startpipeline\nCOPY psql_pipeline TO STDOUT;\nSELECT 'val1';\n\\endpipeline", + aborts, + "COPY TO in pipeline: fails", + ) + _psql_fails_like( + node, + "\\startpipeline\n\\copy psql_pipeline from stdin;\nSELECT 'val1';\n" + "\\syncpipeline\n\\endpipeline", + aborts, + "\\copy from in pipeline: fails", + ) + _psql_fails_like( + node, + "\\startpipeline\n\\copy psql_pipeline to stdout;\n\\syncpipeline\n" + "\\endpipeline", + aborts, + "\\copy to in pipeline: fails", + ) + + _psql_fails_like( + node, + "\\restrict test\n\\! should_fail", + r"backslash commands are restricted; only \\unrestrict is allowed", + "meta-command in restrict mode fails", + ) diff --git a/src/bin/psql/pyt/test_010_tab_completion.py b/src/bin/psql/pyt/test_010_tab_completion.py new file mode 100644 index 0000000000000..1036de40714c9 --- /dev/null +++ b/src/bin/psql/pyt/test_010_tab_completion.py @@ -0,0 +1,533 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/psql/t/010_tab_completion.pl. + +Drives an interactive (PTY) psql session and checks that readline tab +completion responds as expected: command/keyword completion, table and +schema-qualified name completion (with quoting and case folding), filename and +enum/timezone label completion, GUC and psql-variable completion, and the +FOR PORTION OF column completions for DELETE/UPDATE. + +Like the Perl original this does nothing unless the build is --with-readline +(the with_readline env var is 'yes'), is not disabled via SKIP_READLINE_TESTS, +and a PTY is available (not Windows). +""" + +import os +import re +import sys +import tempfile + +import pytest + + +def _check_completion(handle, send, pattern, annotation): + """Send input, wait for the accumulated terminal output to match pattern.""" + regex = pattern if hasattr(pattern, "search") else re.compile(pattern) + out = handle.query_until(regex, send) + assert regex.search(out) and not handle.timed_out, annotation + + +def _clear_query(handle): + """Clear the query buffer to start over (won't work inside a string).""" + _check_completion( + handle, + "\\r\n", + re.compile(r"Query buffer reset.*postgres=# ", re.S), + "\\r works", + ) + + +def _clear_line(handle): + """Clear the current line to start over (works in an incomplete literal).""" + _check_completion(handle, "\025\n", r"postgres=# ", "control-U works") + + +def _basic_completions(handle): + """Command, table-name, case-folding and quoted-name completions.""" + # check basic command completion: SEL produces SELECT + _check_completion(handle, "SEL\t", r"SELECT ", "complete SEL to SELECT") + + _clear_query(handle) + + # check case variation is honored + _check_completion(handle, "sel\t", r"select ", "complete sel to select") + + # check basic table name completion + _check_completion(handle, "* from t\t", r"\* from tab1 ", "complete t to tab1") + + _clear_query(handle) + + # check table name completion with multiple alternatives + # note: readline might print a bell before the completion + _check_completion( + handle, + "select * from my\t", + r"select \* from my\a?tab", + "complete my to mytab when there are multiple choices", + ) + + # some versions of readline/libedit require two tabs here, some only need one + _check_completion( + handle, + "\t\t", + r"mytab123 +mytab246", + "offer multiple table choices", + ) + + _check_completion( + handle, "2\t", r"246 ", "finish completion of one of multiple table choices" + ) + + _clear_query(handle) + + +def _quoted_completions(handle): + """Quoted, mixed-case, case-folded and schema-qualified name completions.""" + # check handling of quoted names + _check_completion( + handle, + 'select * from "my\t', + r'select \* from "my\a?tab', + 'complete "my to "mytab when there are multiple choices', + ) + + _check_completion( + handle, + "\t\t", + r'"mytab123" +"mytab246"', + "offer multiple quoted table choices", + ) + + _check_completion( + handle, + "2\t", + r'246" ', + "finish completion of one of multiple quoted table choices", + ) + + _clear_query(handle) + + # check handling of mixed-case names + _check_completion( + handle, 'select * from "mi\t', r'"mixedName" ', "complete a mixed-case name" + ) + + _clear_query(handle) + + # check case folding + _check_completion( + handle, "select * from TAB\t", r"tab1 ", "automatically fold case" + ) + + _clear_query(handle) + + # check case-sensitive keyword replacement + # note: various versions of readline/libedit handle backspacing + # differently, so just check that the replacement comes out correctly + _check_completion(handle, "\\DRD\t", r"drds ", "complete \\DRD to \\drds") + + _clear_query(handle) + + # check completion of a schema-qualified name + _check_completion( + handle, "select * from pub\t", r"public\.", "complete schema when relevant" + ) + + _check_completion(handle, "tab\t", r"tab1 ", "complete schema-qualified name") + + _clear_query(handle) + + _check_completion( + handle, + "select * from PUBLIC.t\t", + r"public\.tab1 ", + "automatically fold case in schema-qualified name", + ) + + _clear_query(handle) + + +def _refname_completions(handle): + """Completions that interpret referenced (constraint/qualified) names.""" + # check interpretation of referenced names + _check_completion( + handle, + "alter table tab1 drop constraint t\t", + r"tab1_pkey ", + "complete index name for referenced table", + ) + + _clear_query(handle) + + _check_completion( + handle, + "alter table TAB1 drop constraint t\t", + r"tab1_pkey ", + "complete index name for referenced table, with downcasing", + ) + + _clear_query(handle) + + _check_completion( + handle, + 'alter table public."tab1" drop constraint t\t', + r"tab1_pkey ", + "complete index name for referenced table, with schema and quoting", + ) + + _clear_query(handle) + + # check variant where we're completing a qualified name from a refname + # (this one also checks successful completion in a multiline command) + _check_completion( + handle, + "comment on constraint tab1_pkey \n on public.\t", + r"public\.tab1", + "complete qualified name from object reference", + ) + + _clear_query(handle) + + +def _filename_completions(handle): + """Filename completions for \\lo_import and COPY (quoted).""" + # check filename completion + _check_completion( + handle, + "\\lo_import tab_comp_dir/some\t", + r"tab_comp_dir/somefile ", + "filename completion with one possibility", + ) + + _clear_query(handle) + + # note: readline might print a bell before the completion + _check_completion( + handle, + "\\lo_import tab_comp_dir/af\t", + r"tab_comp_dir/af\a?ile", + "filename completion with multiple possibilities", + ) + + # here we are inside a string literal 'afile*', so must use clear_line(). + _clear_line(handle) + + # COPY requires quoting + _check_completion( + handle, + "COPY foo FROM tab_comp_dir/some\t", + r"'tab_comp_dir/somefile' ", + "quoted filename completion with one possibility", + ) + + _clear_query(handle) + + _check_completion( + handle, + "COPY foo FROM tab_comp_dir/af\t", + r"'tab_comp_dir/afile", + "quoted filename completion with multiple possibilities", + ) + + # some versions of readline/libedit require two tabs here, some only need + # one; also, some will offer the whole path name and some just the file + # name; the quotes might appear, too + _check_completion( + handle, + "\t\t", + r"afile123'? +'?(tab_comp_dir/)?afile456", + "offer multiple file choices", + ) + + _clear_line(handle) + + +def _enum_tz_completions(handle): + """Enum label and timezone name completions.""" + # check enum label completion + # some versions of readline/libedit require two tabs here, some only need + # one; also, some versions will offer quotes, some will not + _check_completion( + handle, + "ALTER TYPE enum1 RENAME VALUE 'ba\t\t", + r"'?bar'? +'?baz'?", + "offer multiple enum choices", + ) + + _clear_line(handle) + + # enum labels are case sensitive, so this should complete BLACK immediately + _check_completion( + handle, + "ALTER TYPE enum1 RENAME VALUE 'B\t", + r"BLACK", + "enum labels are case sensitive", + ) + + _clear_line(handle) + + # check timezone name completion + _check_completion( + handle, "SET timezone TO am\t", r"'America/", "offer partial timezone name" + ) + + _check_completion(handle, "new_\t", r"New_York", "complete partial timezone name") + + _clear_line(handle) + + +def _keyword_case_completions(handle): + """Keyword offered with object names obeys COMP_KEYWORD_CASE; plus more.""" + # check completion of a keyword offered in addition to object names; + # such a keyword should obey COMP_KEYWORD_CASE + for case, in_, out in ( + ("lower", "CO", "column"), + ("upper", "co", "COLUMN"), + ("preserve-lower", "co", "column"), + ("preserve-upper", "CO", "COLUMN"), + ): + _check_completion( + handle, + "\\set COMP_KEYWORD_CASE {case}\n".format(case=case), + r"postgres=#", + "set completion case to '{case}'".format(case=case), + ) + _check_completion( + handle, + "alter table tab1 rename {in_}\t\t\t".format(in_=in_), + out, + "offer keyword {out} for input {in_}, " + "COMP_KEYWORD_CASE = {case}".format(out=out, in_=in_, case=case), + ) + _clear_query(handle) + + # alternate path where keyword comes from SchemaQuery + _check_completion( + handle, + "DROP TYPE big\t", + r"DROP TYPE bigint ", + "offer keyword from SchemaQuery", + ) + + _clear_query(handle) + + # check create_command_generator + _check_completion( + handle, "CREATE TY\t", r"CREATE TYPE ", "check create_command_generator" + ) + + _clear_query(handle) + + # check words_after_create infrastructure + _check_completion( + handle, + "CREATE TABLE mytab\t\t", + r"mytab123 +mytab246", + "check words_after_create", + ) + + _clear_query(handle) + + # check VersionedQuery infrastructure + _check_completion( + handle, + "DROP PUBLIC\t \t\t", + r"DROP PUBLICATION\s+some_publication ", + "check VersionedQuery", + ) + + _clear_query(handle) + + # hits ends_with() and logic for completing in multi-line queries + _check_completion( + handle, "analyze (\n\t\t", r"VERBOSE", "check ANALYZE (VERBOSE ..." + ) + + _clear_query(handle) + + +def _guc_var_completions(handle): + """GUC name/value and psql-variable completions.""" + # check completions for GUCs + _check_completion( + handle, "set interval\t\t", r"intervalstyle TO", "complete a GUC name" + ) + _check_completion(handle, " iso\t", r"iso_8601 ", "complete a GUC enum value") + + _clear_query(handle) + + # same, for qualified GUC names + _check_completion( + handle, + "DO $$begin end$$ LANGUAGE plpgsql;\n", + r"postgres=# ", + "load plpgsql extension", + ) + + _check_completion( + handle, "set plpg\t", r"plpg\a?sql\.", "complete prefix of a GUC name" + ) + _check_completion( + handle, "var\t\t", r"variable_conflict TO", "complete a qualified GUC name" + ) + _check_completion( + handle, " USE_C\t", r"use_column", "complete a qualified GUC enum value" + ) + + _clear_query(handle) + + # check completions for psql variables + _check_completion( + handle, "\\set VERB\t", r"VERBOSITY ", "complete a psql variable name" + ) + _check_completion(handle, "def\t", r"default ", "complete a psql variable value") + + _clear_query(handle) + + _check_completion( + handle, + "\\echo :VERB\t", + r":VERBOSITY ", + "complete an interpolated psql variable name", + ) + + _clear_query(handle) + + # check completion for psql variable test + _check_completion( + handle, + "\\echo :{?VERB\t", + r":\{\?VERBOSITY} ", + "complete a psql variable test", + ) + + _clear_query(handle) + + # check no-completions code path + _check_completion(handle, "blarg \t\t", r"", "check completion failure path") + + _clear_query(handle) + + # check COPY FROM with DEFAULT option + _check_completion( + handle, + "COPY foo FROM stdin WITH ( DEF\t)", + r"DEFAULT ", + "COPY FROM with DEFAULT completion", + ) + + _clear_line(handle) + + +def _portion_of_completions(handle, verb, table_in): + """Tab completion for DELETE/UPDATE ... FOR PORTION OF.""" + _check_completion( + handle, + table_in, + r"FOR ", + "complete {verb} F to FOR".format(verb=verb), + ) + + _check_completion(handle, "P\t", r"PORTION ", "complete FOR P to PORTION") + + _check_completion(handle, "O\t", r"OF ", "complete PORTION O to OF") + + _check_completion( + handle, "v\t", r"valid_at ", "complete FOR PORTION OF offers column names" + ) + + _check_completion( + handle, "FR\t", r"FROM ", "complete FOR PORTION OF FR to FROM" + ) + + _clear_query(handle) + + +def _setup_objects(node): + """Create the database objects the completion checks reference.""" + node.safe_psql( + "CREATE TABLE tab1 (c1 int primary key constraint foo not null, c2 text);\n" + "CREATE TABLE mytab123 (f1 int, f2 text);\n" + "CREATE TABLE mytab246 (f1 int, f2 text);\n" + 'CREATE TABLE "mixedName" (f1 int, f2 text);\n' + "CREATE TYPE enum1 AS ENUM ('foo', 'bar', 'baz', 'BLACK');\n" + "CREATE PUBLICATION some_publication;\n" + "CREATE TABLE fpo_test (id int4range, valid_at daterange, name text);\n" + ) + + +def _make_junk_files(): + """Create the tab_comp_dir junk files for filename completion testing.""" + os.makedirs("tab_comp_dir", exist_ok=True) + with open("tab_comp_dir/somefile", "w", encoding="utf-8") as fh: + fh.write("some stuff\n") + with open("tab_comp_dir/afile123", "w", encoding="utf-8") as fh: + fh.write("more stuff\n") + with open("tab_comp_dir/afile456", "w", encoding="utf-8") as fh: + fh.write("other stuff\n") + + +def test_010_tab_completion(create_pg, monkeypatch): + """psql readline tab completion responds as expected (skips w/o readline).""" + # Do nothing unless the build is --with-readline. + if os.environ.get("with_readline") != "yes": + pytest.skip("readline is not supported by this build") + # Also, skip if user has set environment variable to command that. This is + # mainly intended to allow working around some of the more broken versions + # of libedit --- some users might find them acceptable even if they won't + # pass these tests. + if os.environ.get("SKIP_READLINE_TESTS"): + pytest.skip("SKIP_READLINE_TESTS is set") + # If we don't have a PTY, forget it (the Perl IO::Pty requirement). + if sys.platform == "win32": + pytest.skip("a PTY is needed to run this test") + + # start a new server + node = create_pg("main") + + # set up a few database objects + _setup_objects(node) + + # In a VPATH build, we'll be started in the source directory, but we want + # to run in the build directory so that we can use relative paths to access + # the tab_comp_dir subdirectory; otherwise the output from filename + # completion tests is too variable. + testdatadir = os.environ.get("TESTDATADIR") + monkeypatch.chdir(testdatadir if testdatadir else tempfile.mkdtemp()) + + # Create some junk files for filename completion testing. + _make_junk_files() + + # Arrange to capture, not discard, the interactive session's history + # output. Put it in the test log directory, so that buildfarm runs capture + # the result for possible debugging purposes. + logdir = os.environ.get("TESTLOGDIR") + if logdir: + historyfile = os.path.join(logdir, "010_psql_history.txt") + else: + historyfile = os.path.join(tempfile.mkdtemp(), "010_psql_history.txt") + + # fire up an interactive psql session and configure it such that each query + # restarts the timer + handle = node.interactive_psql("postgres", history_file=historyfile) + handle.set_query_timer_restart() + + _basic_completions(handle) + _quoted_completions(handle) + _refname_completions(handle) + _filename_completions(handle) + _enum_tz_completions(handle) + _keyword_case_completions(handle) + _guc_var_completions(handle) + + # check tab completion for DELETE ... FOR PORTION OF + _portion_of_completions(handle, "DELETE FROM", "DELETE FROM fpo_test F\t") + # check tab completion for UPDATE ... FOR PORTION OF + _portion_of_completions(handle, "UPDATE", "UPDATE fpo_test F\t") + + # send psql an explicit \q to shut it down, else pty won't close properly + handle.quit() + + # done + node.stop() diff --git a/src/bin/psql/pyt/test_020_cancel.py b/src/bin/psql/pyt/test_020_cancel.py new file mode 100644 index 0000000000000..5745d1e28e939 --- /dev/null +++ b/src/bin/psql/pyt/test_020_cancel.py @@ -0,0 +1,43 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/psql/t/020_cancel.pl. + +Query cancellation by sending SIGINT to a running psql. +""" + +import os +import platform +import re +import signal + +import pytest + +# Sending SIGINT on Windows would terminate the test itself. +pytestmark = pytest.mark.skipif( + platform.system() == "Windows", + reason="sending SIGINT on Windows terminates the test itself", +) + + +def test_cancel(create_pg): + """SIGINT to psql cancels its running statement.""" + node = create_pg("main") + timeout_default = int(os.environ.get("PG_TEST_TIMEOUT_DEFAULT", "180")) + + session = node.background_psql() + + # Send a sleep and wait until the server has registered it. + session.send("select pg_sleep({});\n".format(timeout_default)) + assert node.poll_query_until( + "SELECT (SELECT count(*) FROM pg_stat_activity " + "WHERE query ~ '^select pg_sleep') > 0;" + ), "server registered the sleep" + + # Send the cancel request. + session.signal(signal.SIGINT) + result = session.finish() + + assert result != 0, "query failed as expected" + assert re.search( + r"canceling statement due to user request", session.stderr + ), "query was canceled" diff --git a/src/bin/psql/pyt/test_030_pager.py b/src/bin/psql/pyt/test_030_pager.py new file mode 100644 index 0000000000000..1b7d6d0025473 --- /dev/null +++ b/src/bin/psql/pyt/test_030_pager.py @@ -0,0 +1,74 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/bin/psql/t/030_pager.pl. + +With PSQL_PAGER set to "wc -l" and a fixed 24x80 terminal, psql invokes the +pager exactly when output exceeds the screen: a 23-row result is printed +directly, a 24-row result is paged (wc reports the line count), and expanded +mode and a \\d+ footer also trigger paging with the expected counts. +""" + +import re +import shutil +import subprocess +import sys + +import pytest + + +def _wc_l_works(): + if not shutil.which("wc"): + return False + out = subprocess.run( + ["wc", "-l"], + input="foo bar\nbaz\n", + encoding="utf-8", + capture_output=True, + check=False, + ) + return out.returncode == 0 and out.stdout.strip() == "2" + + +@pytest.mark.skipif(sys.platform == "win32", reason="requires a PTY") +def test_030_pager(create_pg, monkeypatch): + """psql invokes the pager only when output exceeds the 24x80 screen.""" + if not _wc_l_works(): + pytest.skip('"wc -l" is needed to run this test') + monkeypatch.setenv("PSQL_PAGER", "wc -l") + node = create_pg("main") + cols = ",\n".join("{} as {}".format(i + 1, chr(ord("a") + i)) for i in range(26)) + node.safe_psql("create view public.view_030_pager as select\n" + cols) + psql = node.interactive_psql("postgres") + psql.set_query_timer_restart() + psql.set_winsize(24, 80) + _do( + psql, + "SELECT 'test' AS t FROM generate_series(1,23);\n", + r"test\r?$", + "execute SELECT query that needs no pagination", + ) + _do( + psql, + "SELECT 'test' AS t FROM generate_series(1,24);\n", + r"24\r?$", + "execute SELECT query that needs pagination", + ) + _do( + psql, + "\\pset expanded\nSELECT generate_series(1,20) as g;\n", + r"39\r?$", + "execute SELECT query that needs pagination in expanded mode", + ) + _do( + psql, + "\\pset tuples_only off\n\\d+ public.view_030_pager\n", + r"55\r?$", + "execute command with footer that needs pagination", + ) + psql.quit() + node.stop() + + +def _do(psql, send, pattern, annotation): + out = psql.query_until(re.compile(pattern, re.M), send) + assert re.search(pattern, out, re.M) and not psql.timed_out, annotation diff --git a/src/bin/scripts/Makefile b/src/bin/scripts/Makefile index e6cd9ef4af57f..88f7ca8d94036 100644 --- a/src/bin/scripts/Makefile +++ b/src/bin/scripts/Makefile @@ -65,6 +65,7 @@ export with_icu check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/bin/scripts/meson.build b/src/bin/scripts/meson.build index c083ec38099c7..24aba21b57254 100644 --- a/src/bin/scripts/meson.build +++ b/src/bin/scripts/meson.build @@ -83,6 +83,24 @@ tests += { 't/200_connstr.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_010_clusterdb.py', + 'pyt/test_011_clusterdb_all.py', + 'pyt/test_020_createdb.py', + 'pyt/test_040_createuser.py', + 'pyt/test_050_dropdb.py', + 'pyt/test_070_dropuser.py', + 'pyt/test_080_pg_isready.py', + 'pyt/test_090_reindexdb.py', + 'pyt/test_091_reindexdb_all.py', + 'pyt/test_100_vacuumdb.py', + 'pyt/test_101_vacuumdb_all.py', + 'pyt/test_102_vacuumdb_stages.py', + 'pyt/test_200_connstr.py', + ], + 'env': {'with_icu': icu.found() ? 'yes' : 'no'}, + }, } subdir('po', if_found: libintl) diff --git a/src/bin/scripts/pyt/test_010_clusterdb.py b/src/bin/scripts/pyt/test_010_clusterdb.py new file mode 100644 index 0000000000000..7f33202e58bf0 --- /dev/null +++ b/src/bin/scripts/pyt/test_010_clusterdb.py @@ -0,0 +1,35 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/010_clusterdb.pl.""" + + +def test_clusterdb(pg_bin, create_pg): + """clusterdb basics: SQL issued, nonexistent table, specific table, connstr.""" + pg_bin.program_help_ok("clusterdb") + pg_bin.program_version_ok("clusterdb") + pg_bin.program_options_handling_ok("clusterdb") + + node = create_pg("main") + + node.issues_sql_like(["clusterdb"], r"statement: CLUSTER;", "SQL CLUSTER run") + + node.command_fails_like( + ["clusterdb", "--table", "nonexistent"], + r'relation "nonexistent" does not exist', + "fails with nonexistent table", + ) + + node.safe_psql( + "CREATE TABLE test1 (a int); CREATE INDEX test1x ON test1 (a); " + "CLUSTER test1 USING test1x" + ) + node.issues_sql_like( + ["clusterdb", "--table", "test1"], + r"statement: CLUSTER public\.test1;", + "cluster specific table", + ) + + node.command_ok( + ["clusterdb", "--echo", "--verbose", "dbname=template1"], + "clusterdb with connection string", + ) diff --git a/src/bin/scripts/pyt/test_011_clusterdb_all.py b/src/bin/scripts/pyt/test_011_clusterdb_all.py new file mode 100644 index 0000000000000..4a063642b7655 --- /dev/null +++ b/src/bin/scripts/pyt/test_011_clusterdb_all.py @@ -0,0 +1,48 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/011_clusterdb_all.pl.""" + + +def test_clusterdb_all(create_pg): + """clusterdb --all clusters every database and skips invalid ones.""" + node = create_pg("main") + + # clusterdb -a is not compatible with -d. This relies on PGDATABASE being + # set, something the pg fixtures do. + node.issues_sql_like( + ["clusterdb", "--all"], + r"(?s)statement: CLUSTER.*statement: CLUSTER", + "cluster all databases", + ) + + node.safe_psql( + "CREATE DATABASE regression_invalid;" + " UPDATE pg_database SET datconnlimit = -2" + " WHERE datname = 'regression_invalid';" + ) + node.command_ok( + ["clusterdb", "--all"], "invalid database not targeted by clusterdb -a" + ) + + # Doesn't quite belong here, but avoids creating an invalid database in + # 010_clusterdb as well. + node.command_fails_like( + ["clusterdb", "--dbname", "regression_invalid"], + r'FATAL: cannot connect to invalid database "regression_invalid"', + "clusterdb cannot target invalid database", + ) + + node.safe_psql( + "CREATE TABLE test1 (a int); CREATE INDEX test1x ON test1 (a); " + "CLUSTER test1 USING test1x" + ) + node.safe_psql( + "CREATE TABLE test1 (a int); CREATE INDEX test1x ON test1 (a); " + "CLUSTER test1 USING test1x", + dbname="template1", + ) + node.issues_sql_like( + ["clusterdb", "--all", "--table", "test1"], + r"(?s)statement: CLUSTER public\.test1", + "cluster specific table in all databases", + ) diff --git a/src/bin/scripts/pyt/test_020_createdb.py b/src/bin/scripts/pyt/test_020_createdb.py new file mode 100644 index 0000000000000..a85a58adad6d4 --- /dev/null +++ b/src/bin/scripts/pyt/test_020_createdb.py @@ -0,0 +1,413 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/020_createdb.pl.""" + +import os +import re + + +def test_createdb(pg_bin, create_pg): + """createdb across encodings, locale providers, strategies, and templates.""" + pg_bin.program_help_ok("createdb") + pg_bin.program_version_ok("createdb") + pg_bin.program_options_handling_ok("createdb") + + node = create_pg("main") + + node.issues_sql_like( + ["createdb", "foobar1"], + r"statement: CREATE DATABASE foobar1", + "SQL CREATE DATABASE run", + ) + node.issues_sql_like( + [ + "createdb", + "--locale", + "C", + "--encoding", + "LATIN1", + "--template", + "template0", + "foobar2", + ], + r"statement: CREATE DATABASE foobar2 ENCODING 'LATIN1'", + "create database with encoding", + ) + + if os.environ.get("with_icu") == "yes": + _test_icu(pg_bin, node, create_pg) + else: + node.command_fails( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "icu", + "foobar4", + ], + "create database with ICU fails since no ICU support", + ) + + _test_builtin_provider(node) + _test_misc_failures(node) + _test_templates_and_strategies(node) + + +def _test_icu(pg_bin, node, create_pg): + # Fails: template0 uses libc provider and has no ICU locale set. + node.command_fails( + [ + "createdb", + "--template", + "template0", + "--encoding", + "UTF8", + "--locale-provider", + "icu", + "foobar4", + ], + "create database with ICU fails without ICU locale specified", + ) + node.issues_sql_like( + [ + "createdb", + "--template", + "template0", + "--encoding", + "UTF8", + "--locale-provider", + "icu", + "--locale", + "C", + "--icu-locale", + "en", + "foobar5", + ], + r"statement: CREATE DATABASE foobar5 .* LOCALE_PROVIDER icu ICU_LOCALE 'en'", + "create database with ICU locale specified", + ) + node.command_fails( + [ + "createdb", + "--template", + "template0", + "--encoding", + "UTF8", + "--locale-provider", + "icu", + "--icu-locale", + "@colNumeric=lower", + "foobarX", + ], + "fails for invalid ICU locale", + ) + node.command_fails_like( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "icu", + "--encoding", + "SQL_ASCII", + "foobarX", + ], + r'ERROR: encoding "SQL_ASCII" is not supported with ICU provider', + "fails for encoding not supported by ICU", + ) + + # Additional node which uses the icu provider. + node2 = create_pg("icu", extra=["--locale-provider=icu", "--icu-locale=en"]) + node2.command_ok( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "libc", + "foobar55", + ], + "create database with libc provider from template database with icu provider", + ) + node2.command_ok( + ["createdb", "--template", "template0", "--icu-locale", "en-US", "foobar56"], + "create database with icu locale from template database with icu provider", + ) + node2.command_ok( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "icu", + "--locale", + "en", + "--lc-collate", + "C", + "--lc-ctype", + "C", + "foobar57", + ], + "create database with locale as ICU locale", + ) + + +def _test_builtin_provider(node): + node.command_fails( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "builtin", + "tbuiltin1", + ], + 'create database with provider "builtin" fails without --locale', + ) + node.command_ok( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "builtin", + "--locale", + "C", + "tbuiltin2", + ], + 'create database with provider "builtin" and locale "C"', + ) + node.command_ok( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "builtin", + "--locale", + "C", + "--lc-collate", + "C", + "tbuiltin3", + ], + 'create database with provider "builtin" and LC_COLLATE=C', + ) + node.command_ok( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "builtin", + "--locale", + "C", + "--lc-ctype", + "C", + "tbuiltin4", + ], + 'create database with provider "builtin" and LC_CTYPE=C', + ) + node.command_ok( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "builtin", + "--lc-collate", + "C", + "--lc-ctype", + "C", + "--encoding", + "UTF-8", + "--builtin-locale", + "C.UTF8", + "tbuiltin5", + ], + "create database with --builtin-locale C.UTF-8 and -E UTF-8", + ) + node.command_fails( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "builtin", + "--lc-collate", + "C", + "--lc-ctype", + "C", + "--encoding", + "LATIN1", + "--builtin-locale", + "C.UTF-8", + "tbuiltin6", + ], + "create database with --builtin-locale C.UTF-8 and -E LATIN1", + ) + node.command_fails( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "builtin", + "--locale", + "C", + "--icu-locale", + "en", + "tbuiltin7", + ], + 'create database with provider "builtin" and ICU_LOCALE="en"', + ) + node.command_fails( + [ + "createdb", + "--template", + "template0", + "--locale-provider", + "builtin", + "--locale", + "C", + "--icu-rules", + '""', + "tbuiltin8", + ], + 'create database with provider "builtin" and ICU_RULES=""', + ) + node.command_fails( + [ + "createdb", + "--template", + "template1", + "--locale-provider", + "builtin", + "--locale", + "C", + "tbuiltin9", + ], + 'create database with provider "builtin" not matching template', + ) + + +def _test_misc_failures(node): + node.command_fails(["createdb", "foobar1"], "fails if database already exists") + node.command_fails( + ["createdb", "--template", "template0", "--locale-provider", "xyz", "foobarX"], + "fails for invalid locale provider", + ) + node.command_fails_like( + ["createdb", "invalid \n dbname"], + r"contains a newline or carriage return character", + "fails if database name contains a newline character in name", + ) + node.command_fails_like( + ["createdb", "invalid \r dbname"], + r"contains a newline or carriage return character", + "fails if database name contains a carriage return character in name", + ) + + # Quote handling with incorrect option values. + node.command_checks_all( + ["createdb", "--encoding", "foo'; SELECT '1", "foobar2"], + 1, + [r"^$"], + [r"""(?s)^createdb: error: "foo'; SELECT '1" is not a valid encoding name"""], + "createdb with incorrect --encoding", + ) + node.command_checks_all( + ["createdb", "--lc-collate", "foo'; SELECT '1", "foobar2"], + 1, + [r"^$"], + [ + r"(?s)^createdb: error: database creation failed: ERROR: " + r"invalid LC_COLLATE locale name" + r"|^createdb: error: database creation failed: ERROR: " + r"new collation \(foo'; SELECT '1\) is incompatible with the " + r"collation of the template database" + ], + "createdb with incorrect --lc-collate", + ) + node.command_checks_all( + ["createdb", "--lc-ctype", "foo'; SELECT '1", "foobar2"], + 1, + [r"^$"], + [ + r"(?s)^createdb: error: database creation failed: ERROR: " + r"invalid LC_CTYPE locale name" + r"|^createdb: error: database creation failed: ERROR: " + r"new LC_CTYPE \(foo'; SELECT '1\) is incompatible with the " + r"LC_CTYPE of the template database" + ], + "createdb with incorrect --lc-ctype", + ) + node.command_checks_all( + ["createdb", "--strategy", "foo", "foobar2"], + 1, + [r"^$"], + [ + r"(?s)^createdb: error: database creation failed: ERROR: " + r'invalid create database strategy "foo"' + ], + "createdb with incorrect --strategy", + ) + + +def _test_templates_and_strategies(node): + # Use of templates with shared dependencies copied from the template. + node.safe_psql( + "CREATE ROLE role_foobar;" + " CREATE TABLE tab_foobar (id int);" + " ALTER TABLE tab_foobar owner to role_foobar;" + " CREATE POLICY pol_foobar ON tab_foobar FOR ALL TO role_foobar;", + dbname="foobar2", + ) + node.issues_sql_like( + ["createdb", "--locale", "C", "--template", "foobar2", "foobar3"], + r"statement: CREATE DATABASE foobar3 TEMPLATE foobar2 LOCALE 'C'", + "create database with template", + ) + stdout = node.safe_psql( + "SELECT pg_describe_object(classid, objid, objsubid) AS obj," + " pg_describe_object(refclassid, refobjid, 0) AS refobj" + " FROM pg_shdepend s JOIN pg_database d ON (d.oid = s.dbid)" + " WHERE d.datname = 'foobar3' ORDER BY obj;", + dbname="foobar3", + ) + assert re.search( + r"^policy pol_foobar on table tab_foobar\|role role_foobar\n" + r"table tab_foobar\|role role_foobar$", + stdout, + ), "shared dependencies copied over to target database" + + # Database creation strategy. + node.issues_sql_like( + ["createdb", "--template", "foobar2", "--strategy", "wal_log", "foobar6"], + r"statement: CREATE DATABASE foobar6 STRATEGY wal_log TEMPLATE foobar2", + "create database with WAL_LOG strategy", + ) + node.issues_sql_like( + ["createdb", "--template", "foobar2", "--strategy", "WAL_LOG", "foobar6s"], + r'statement: CREATE DATABASE foobar6s STRATEGY "WAL_LOG" TEMPLATE foobar2', + "create database with WAL_LOG strategy", + ) + node.issues_sql_like( + ["createdb", "--template", "foobar2", "--strategy", "file_copy", "foobar7"], + r"statement: CREATE DATABASE foobar7 STRATEGY file_copy TEMPLATE foobar2", + "create database with FILE_COPY strategy", + ) + node.issues_sql_like( + ["createdb", "--template", "foobar2", "--strategy", "FILE_COPY", "foobar7s"], + r'statement: CREATE DATABASE foobar7s STRATEGY "FILE_COPY" TEMPLATE foobar2', + "create database with FILE_COPY strategy", + ) + + # Database owned by role_foobar. + node.issues_sql_like( + ["createdb", "--template", "foobar2", "--owner", "role_foobar", "foobar8"], + r"statement: CREATE DATABASE foobar8 OWNER role_foobar TEMPLATE foobar2", + "create database with owner role_foobar", + ) + node.safe_psql("DROP OWNED BY role_foobar;", dbname="foobar2") + node.safe_psql("DROP DATABASE foobar8;", dbname="foobar2") diff --git a/src/bin/scripts/pyt/test_040_createuser.py b/src/bin/scripts/pyt/test_040_createuser.py new file mode 100644 index 0000000000000..8d5c3321174e9 --- /dev/null +++ b/src/bin/scripts/pyt/test_040_createuser.py @@ -0,0 +1,124 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/040_createuser.pl.""" + + +def test_createuser(pg_bin, create_pg): + """createuser emits the expected CREATE ROLE for each option combination.""" + pg_bin.program_help_ok("createuser") + pg_bin.program_version_ok("createuser") + pg_bin.program_options_handling_ok("createuser") + + node = create_pg("main") + + node.issues_sql_like( + ["createuser", "regress_user1"], + r"statement: CREATE ROLE regress_user1 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS;", + "SQL CREATE USER run", + ) + node.issues_sql_like( + ["createuser", "--no-login", "regress_role1"], + r"statement: CREATE ROLE regress_role1 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT NOLOGIN NOREPLICATION NOBYPASSRLS;", + "create a non-login role", + ) + node.issues_sql_like( + ["createuser", "--createrole", "regress user2"], + r'statement: CREATE ROLE "regress user2" NOSUPERUSER NOCREATEDB ' + r"CREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS;", + "create a CREATEROLE user", + ) + node.issues_sql_like( + ["createuser", "--superuser", "regress_user3"], + r"statement: CREATE ROLE regress_user3 SUPERUSER CREATEDB CREATEROLE " + r"INHERIT LOGIN NOREPLICATION NOBYPASSRLS;", + "create a superuser", + ) + node.issues_sql_like( + [ + "createuser", + "--with-admin", + "regress_user1", + "--with-admin", + "regress user2", + "regress user #4", + ], + r'statement: CREATE ROLE "regress user #4" NOSUPERUSER NOCREATEDB ' + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS " + r'ADMIN regress_user1,"regress user2";', + "add a role as a member with admin option of the newly created role", + ) + node.issues_sql_like( + [ + "createuser", + "REGRESS_USER5", + "--with-member", + "regress_user3", + "--with-member", + "regress user #4", + ], + r'statement: CREATE ROLE "REGRESS_USER5" NOSUPERUSER NOCREATEDB ' + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS " + r'ROLE regress_user3,"regress user #4";', + "add a role as a member of the newly created role", + ) + node.issues_sql_like( + ["createuser", "--valid-until", "2029 12 31", "regress_user6"], + r"statement: CREATE ROLE regress_user6 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS " + r"VALID UNTIL '2029 12 31';", + "create a role with a password expiration date", + ) + node.issues_sql_like( + ["createuser", "--bypassrls", "regress_user7"], + r"statement: CREATE ROLE regress_user7 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION BYPASSRLS;", + "create a BYPASSRLS role", + ) + node.issues_sql_like( + ["createuser", "--no-bypassrls", "regress_user8"], + r"statement: CREATE ROLE regress_user8 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS;", + "create a role without BYPASSRLS", + ) + node.issues_sql_like( + ["createuser", "--with-admin", "regress_user1", "regress_user9"], + r"statement: CREATE ROLE regress_user9 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS " + r"ADMIN regress_user1;", + "--with-admin", + ) + node.issues_sql_like( + ["createuser", "--with-member", "regress_user1", "regress_user10"], + r"statement: CREATE ROLE regress_user10 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS " + r"ROLE regress_user1;", + "--with-member", + ) + node.issues_sql_like( + ["createuser", "--role", "regress_user1", "regress_user11"], + r"statement: CREATE ROLE regress_user11 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS " + r"IN ROLE regress_user1;", + "--role", + ) + node.issues_sql_like( + ["createuser", "regress_user12", "--member-of", "regress_user1"], + r"statement: CREATE ROLE regress_user12 NOSUPERUSER NOCREATEDB " + r"NOCREATEROLE INHERIT LOGIN NOREPLICATION NOBYPASSRLS " + r"IN ROLE regress_user1;", + "--member-of", + ) + + node.command_fails(["createuser", "regress_user1"], "fails if role already exists") + node.command_fails( + [ + "createuser", + "regress_user1", + "--with-member", + "regress_user2", + "regress_user3", + ], + "fails for too many non-options", + ) diff --git a/src/bin/scripts/pyt/test_050_dropdb.py b/src/bin/scripts/pyt/test_050_dropdb.py new file mode 100644 index 0000000000000..6ffaaadc4b64c --- /dev/null +++ b/src/bin/scripts/pyt/test_050_dropdb.py @@ -0,0 +1,40 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/050_dropdb.pl.""" + + +def test_dropdb(pg_bin, create_pg): + """dropdb basics, --force, nonexistent, and dropping an invalid database.""" + pg_bin.program_help_ok("dropdb") + pg_bin.program_version_ok("dropdb") + pg_bin.program_options_handling_ok("dropdb") + + node = create_pg("main") + + node.safe_psql("CREATE DATABASE foobar1") + node.issues_sql_like( + ["dropdb", "foobar1"], + r"statement: DROP DATABASE foobar1", + "SQL DROP DATABASE run", + ) + + node.safe_psql("CREATE DATABASE foobar2") + node.issues_sql_like( + ["dropdb", "--force", "foobar2"], + r"statement: DROP DATABASE foobar2 WITH \(FORCE\);", + "SQL DROP DATABASE (FORCE) run", + ) + + node.command_fails_like( + ["dropdb", "nonexistent"], + r'database "nonexistent" does not exist', + "fails with nonexistent database", + ) + + # An invalid database can be dropped with dropdb. + node.safe_psql( + "CREATE DATABASE regression_invalid;" + " UPDATE pg_database SET datconnlimit = -2" + " WHERE datname = 'regression_invalid';" + ) + node.command_ok(["dropdb", "regression_invalid"], "invalid database can be dropped") diff --git a/src/bin/scripts/pyt/test_070_dropuser.py b/src/bin/scripts/pyt/test_070_dropuser.py new file mode 100644 index 0000000000000..777bacb592af9 --- /dev/null +++ b/src/bin/scripts/pyt/test_070_dropuser.py @@ -0,0 +1,25 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/070_dropuser.pl.""" + + +def test_dropuser(pg_bin, create_pg): + """dropuser drops a role and fails for a nonexistent one.""" + pg_bin.program_help_ok("dropuser") + pg_bin.program_version_ok("dropuser") + pg_bin.program_options_handling_ok("dropuser") + + node = create_pg("main") + + node.safe_psql("CREATE ROLE regress_foobar1") + node.issues_sql_like( + ["dropuser", "regress_foobar1"], + r"statement: DROP ROLE regress_foobar1", + "SQL DROP ROLE run", + ) + + node.command_fails_like( + ["dropuser", "regress_nonexistent"], + r'role "regress_nonexistent" does not exist', + "fails with nonexistent user", + ) diff --git a/src/bin/scripts/pyt/test_080_pg_isready.py b/src/bin/scripts/pyt/test_080_pg_isready.py new file mode 100644 index 0000000000000..380e3058f131f --- /dev/null +++ b/src/bin/scripts/pyt/test_080_pg_isready.py @@ -0,0 +1,24 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/080_pg_isready.pl.""" + +import os + + +def test_pg_isready(pg_bin, create_pg): + """pg_isready fails with no server and succeeds once the server is up.""" + pg_bin.program_help_ok("pg_isready") + pg_bin.program_version_ok("pg_isready") + pg_bin.program_options_handling_ok("pg_isready") + + node = create_pg("main", start=False) + + node.command_fails(["pg_isready"], "fails with no server running") + + node.start() + + timeout_default = os.environ.get("PG_TEST_TIMEOUT_DEFAULT", "180") + node.command_ok( + ["pg_isready", "--timeout", timeout_default], + "succeeds with server running", + ) diff --git a/src/bin/scripts/pyt/test_090_reindexdb.py b/src/bin/scripts/pyt/test_090_reindexdb.py new file mode 100644 index 0000000000000..fb78e5c0a9e01 --- /dev/null +++ b/src/bin/scripts/pyt/test_090_reindexdb.py @@ -0,0 +1,302 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/090_reindexdb.pl.""" + +# Save the relfilenodes of a set of indexes and compare them across REINDEX. +_FETCH_TOAST = ( + "SELECT b.oid::regclass, c.oid::regclass::text, c.oid, c.relfilenode" + " FROM pg_class a" + " JOIN pg_class b ON (a.oid = b.reltoastrelid)" + " JOIN pg_index i on (a.oid = i.indrelid)" + " JOIN pg_class c on (i.indexrelid = c.oid)" + " WHERE b.oid IN ('pg_constraint'::regclass, 'test1'::regclass)" +) +_FETCH_INDEX = ( + "SELECT i.indrelid, a.oid::regclass::text, a.oid, a.relfilenode" + " FROM pg_class a" + " JOIN pg_index i ON (i.indexrelid = a.oid)" + " WHERE a.relname IN ('pg_constraint_oid_index', 'test1x')" +) +_SAVE_RELFILENODES = "INSERT INTO index_relfilenodes {};".format( + _FETCH_TOAST +) + "INSERT INTO index_relfilenodes {};".format(_FETCH_INDEX) +_COMPARE_RELFILENODES = ( + "SELECT b.parent::regclass," + " regexp_replace(b.indname::text, '(pg_toast.pg_toast_)\\d+(_index)'," + " '\\1\\2')," + " CASE WHEN a.oid = b.indoid THEN 'OID is unchanged'" + " ELSE 'OID has changed' END," + " CASE WHEN a.relfilenode = b.relfilenode THEN 'relfilenode is unchanged'" + " ELSE 'relfilenode has changed' END" + " FROM index_relfilenodes b" + " JOIN pg_class a ON b.indname::text = a.oid::regclass::text" + " ORDER BY b.parent::text, b.indname::text" +) + + +def test_reindexdb(pg_bin, create_pg, tmp_path, monkeypatch): + """reindexdb across object types, tablespaces, --concurrently and --jobs.""" + pg_bin.program_help_ok("reindexdb") + pg_bin.program_version_ok("reindexdb") + pg_bin.program_options_handling_ok("reindexdb") + + node = create_pg("main") + monkeypatch.setenv("PGOPTIONS", "--client-min-messages=WARNING") + + tbspace_path = tmp_path / "regress_reindex_tbspace" + tbspace_path.mkdir() + tbspace_name = "reindex_tbspace" + node.safe_psql( + "CREATE TABLESPACE {} LOCATION '{}';".format(tbspace_name, tbspace_path) + ) + + # Use text as data type to get a toast table. + node.safe_psql("CREATE TABLE test1 (a text); CREATE INDEX test1x ON test1 (a);") + toast_table = node.safe_psql( + "SELECT reltoastrelid::regclass FROM pg_class WHERE oid = 'test1'::regclass;" + ) + toast_index = node.safe_psql( + "SELECT indexrelid::regclass FROM pg_index " + "WHERE indrelid = '{}'::regclass;".format(toast_table) + ) + + node.safe_psql( + "CREATE TABLE index_relfilenodes " + "(parent regclass, indname text, indoid oid, relfilenode oid);" + ) + + node.safe_psql(_SAVE_RELFILENODES) + node.issues_sql_like( + ["reindexdb", "postgres"], + r"statement: REINDEX DATABASE postgres;", + "SQL REINDEX run", + ) + assert node.safe_psql(_COMPARE_RELFILENODES) == ( + "pg_constraint|pg_constraint_oid_index|OID is unchanged|" + "relfilenode is unchanged\n" + "pg_constraint|pg_toast.pg_toast__index|OID is unchanged|" + "relfilenode is unchanged\n" + "test1|pg_toast.pg_toast__index|OID is unchanged|" + "relfilenode has changed\n" + "test1|test1x|OID is unchanged|relfilenode has changed" + ), "relfilenode change after REINDEX DATABASE" + + node.safe_psql("TRUNCATE index_relfilenodes; " + _SAVE_RELFILENODES) + node.issues_sql_like( + ["reindexdb", "--system", "postgres"], + r"statement: REINDEX SYSTEM postgres;", + "reindex system tables", + ) + assert node.safe_psql(_COMPARE_RELFILENODES) == ( + "pg_constraint|pg_constraint_oid_index|OID is unchanged|" + "relfilenode has changed\n" + "pg_constraint|pg_toast.pg_toast__index|OID is unchanged|" + "relfilenode has changed\n" + "test1|pg_toast.pg_toast__index|OID is unchanged|" + "relfilenode is unchanged\n" + "test1|test1x|OID is unchanged|relfilenode is unchanged" + ), "relfilenode change after REINDEX SYSTEM" + + node.issues_sql_like( + ["reindexdb", "--table", "test1", "postgres"], + r"statement: REINDEX TABLE public\.test1;", + "reindex specific table", + ) + node.issues_sql_like( + ["reindexdb", "--table", "test1", "--tablespace", tbspace_name, "postgres"], + r"statement: REINDEX \(TABLESPACE {}\) TABLE public\.test1;".format( + tbspace_name + ), + "reindex specific table on tablespace", + ) + node.issues_sql_like( + ["reindexdb", "--index", "test1x", "postgres"], + r"statement: REINDEX INDEX public\.test1x;", + "reindex specific index", + ) + node.issues_sql_like( + ["reindexdb", "--schema", "pg_catalog", "postgres"], + r"statement: REINDEX SCHEMA pg_catalog;", + "reindex specific schema", + ) + node.issues_sql_like( + ["reindexdb", "--verbose", "--table", "test1", "postgres"], + r"statement: REINDEX \(VERBOSE\) TABLE public\.test1;", + "reindex with verbose output", + ) + node.issues_sql_like( + [ + "reindexdb", + "--verbose", + "--table", + "test1", + "--tablespace", + tbspace_name, + "postgres", + ], + r"statement: REINDEX \(VERBOSE, TABLESPACE {}\) TABLE public\.test1;".format( + tbspace_name + ), + "reindex with verbose output and tablespace", + ) + + _test_concurrently(node, tbspace_name, toast_table, toast_index) + _test_connstr_and_parallel(node) + + +def _test_concurrently(node, tbspace_name, toast_table, toast_index): + node.safe_psql("TRUNCATE index_relfilenodes; " + _SAVE_RELFILENODES) + node.issues_sql_like( + ["reindexdb", "--concurrently", "postgres"], + r"statement: REINDEX DATABASE CONCURRENTLY postgres;", + "SQL REINDEX CONCURRENTLY run", + ) + assert node.safe_psql(_COMPARE_RELFILENODES) == ( + "pg_constraint|pg_constraint_oid_index|OID is unchanged|" + "relfilenode is unchanged\n" + "pg_constraint|pg_toast.pg_toast__index|OID is unchanged|" + "relfilenode is unchanged\n" + "test1|pg_toast.pg_toast__index|OID has changed|" + "relfilenode has changed\n" + "test1|test1x|OID has changed|relfilenode has changed" + ), "OID change after REINDEX DATABASE CONCURRENTLY" + + node.issues_sql_like( + ["reindexdb", "--concurrently", "--table", "test1", "postgres"], + r"statement: REINDEX TABLE CONCURRENTLY public\.test1;", + "reindex specific table concurrently", + ) + node.issues_sql_like( + ["reindexdb", "--concurrently", "--index", "test1x", "postgres"], + r"statement: REINDEX INDEX CONCURRENTLY public\.test1x;", + "reindex specific index concurrently", + ) + node.issues_sql_like( + ["reindexdb", "--concurrently", "--schema", "public", "postgres"], + r"statement: REINDEX SCHEMA CONCURRENTLY public;", + "reindex specific schema concurrently", + ) + node.command_fails( + ["reindexdb", "--concurrently", "--system", "postgres"], + "reindex system tables concurrently", + ) + node.issues_sql_like( + ["reindexdb", "--concurrently", "--verbose", "--table", "test1", "postgres"], + r"statement: REINDEX \(VERBOSE\) TABLE CONCURRENTLY public\.test1;", + "reindex with verbose output concurrently", + ) + node.issues_sql_like( + [ + "reindexdb", + "--concurrently", + "--verbose", + "--table", + "test1", + "--tablespace", + tbspace_name, + "postgres", + ], + r"statement: REINDEX \(VERBOSE, TABLESPACE {}\) TABLE CONCURRENTLY " + r"public\.test1;".format(tbspace_name), + "reindex concurrently with verbose output and tablespace", + ) + + # REINDEX TABLESPACE on toast indexes and tables fails. + for args, what in ( + (["--table", toast_table], "reindex toast table with tablespace"), + ( + ["--concurrently", "--table", toast_table], + "reindex toast table concurrently with tablespace", + ), + (["--index", toast_index], "reindex toast index with tablespace"), + ( + ["--concurrently", "--index", toast_index], + "reindex toast index concurrently with tablespace", + ), + ): + node.command_checks_all( + ["reindexdb"] + args + ["--tablespace", tbspace_name, "postgres"], + 1, + [], + [r"cannot move system relation"], + what, + ) + + +def _test_connstr_and_parallel(node): + node.command_ok( + ["reindexdb", "--echo", "--table=pg_am", "dbname=template1"], + "reindexdb table with connection string", + ) + node.command_ok( + ["reindexdb", "--echo", "dbname=template1"], + "reindexdb database with connection string", + ) + node.command_ok( + ["reindexdb", "--echo", "--system", "dbname=template1"], + "reindexdb system with connection string", + ) + + node.safe_psql( + "CREATE SCHEMA s1;" + " CREATE TABLE s1.t1(id integer);" + " CREATE INDEX ON s1.t1(id);" + " CREATE INDEX i1 ON s1.t1(id);" + " CREATE SCHEMA s2;" + " CREATE TABLE s2.t2(id integer);" + " CREATE INDEX ON s2.t2(id);" + " CREATE INDEX i2 ON s2.t2(id);" + " CREATE SCHEMA s3;" + ) + + node.command_fails( + ["reindexdb", "--jobs", "2", "--system", "postgres"], + "parallel reindexdb cannot process system catalogs", + ) + node.command_ok( + [ + "reindexdb", + "--jobs", + "2", + "--index", + "s1.i1", + "--index", + "s2.i2", + "--index", + "s1.t1_id_idx", + "--index", + "s2.t2_id_idx", + "postgres", + ], + "parallel reindexdb for indices", + ) + node.issues_sql_like( + ["reindexdb", "--jobs", "2", "--schema", "s1", "--schema", "s2", "postgres"], + r"statement: REINDEX TABLE s1.t1;", + "parallel reindexdb for schemas does a per-table REINDEX", + ) + node.command_ok( + ["reindexdb", "--jobs", "2", "--schema", "s3"], + "parallel reindexdb with empty schema", + ) + node.command_ok( + ["reindexdb", "--jobs", "2", "--concurrently", "--dbname", "postgres"], + "parallel reindexdb on database, concurrently", + ) + + # Combinations of objects. + node.issues_sql_like( + ["reindexdb", "--system", "--table", "test1", "postgres"], + r"statement: REINDEX SYSTEM postgres;", + "specify both --system and --table", + ) + node.issues_sql_like( + ["reindexdb", "--system", "--index", "test1x", "postgres"], + r"statement: REINDEX INDEX public.test1x;", + "specify both --system and --index", + ) + node.issues_sql_like( + ["reindexdb", "--system", "--schema", "pg_catalog", "postgres"], + r"statement: REINDEX SCHEMA pg_catalog;", + "specify both --system and --schema", + ) diff --git a/src/bin/scripts/pyt/test_091_reindexdb_all.py b/src/bin/scripts/pyt/test_091_reindexdb_all.py new file mode 100644 index 0000000000000..54404a2d9de51 --- /dev/null +++ b/src/bin/scripts/pyt/test_091_reindexdb_all.py @@ -0,0 +1,56 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/091_reindexdb_all.pl.""" + + +def test_reindexdb_all(create_pg, monkeypatch): + """reindexdb --all variants and handling of an invalid database.""" + node = create_pg("main") + + monkeypatch.setenv("PGOPTIONS", "--client-min-messages=WARNING") + + node.safe_psql("CREATE TABLE test1 (a int); CREATE INDEX test1x ON test1 (a);") + node.safe_psql( + "CREATE TABLE test1 (a int); CREATE INDEX test1x ON test1 (a);", + dbname="template1", + ) + node.issues_sql_like( + ["reindexdb", "--all"], + r"(?s)statement: REINDEX.*statement: REINDEX", + "reindex all databases", + ) + node.issues_sql_like( + ["reindexdb", "--all", "--system"], + r"(?s)statement: REINDEX SYSTEM postgres", + "reindex system catalogs in all databases", + ) + node.issues_sql_like( + ["reindexdb", "--all", "--schema", "public"], + r"(?s)statement: REINDEX SCHEMA public", + "reindex schema in all databases", + ) + node.issues_sql_like( + ["reindexdb", "--all", "--index", "test1x"], + r"(?s)statement: REINDEX INDEX public\.test1x", + "reindex index in all databases", + ) + node.issues_sql_like( + ["reindexdb", "--all", "--table", "test1"], + r"(?s)statement: REINDEX TABLE public\.test1", + "reindex table in all databases", + ) + + node.safe_psql( + "CREATE DATABASE regression_invalid;" + " UPDATE pg_database SET datconnlimit = -2" + " WHERE datname = 'regression_invalid';" + ) + node.command_ok( + ["reindexdb", "--all"], "invalid database not targeted by reindexdb --all" + ) + + node.command_fails_like( + ["reindexdb", "--dbname", "regression_invalid"], + r'FATAL: cannot connect to invalid database "regression_invalid"', + "reindexdb cannot target invalid database", + ) diff --git a/src/bin/scripts/pyt/test_100_vacuumdb.py b/src/bin/scripts/pyt/test_100_vacuumdb.py new file mode 100644 index 0000000000000..8fe5d2997ef21 --- /dev/null +++ b/src/bin/scripts/pyt/test_100_vacuumdb.py @@ -0,0 +1,460 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/100_vacuumdb.pl.""" + + +def test_vacuumdb(pg_bin, create_pg): + """vacuumdb option handling, column lists, schemas, and --missing-stats-only.""" + pg_bin.program_help_ok("vacuumdb") + pg_bin.program_version_ok("vacuumdb") + pg_bin.program_options_handling_ok("vacuumdb") + + node = create_pg("main") + + _test_basic_options(node) + _test_quoting_and_columns(node) + _test_schema_options(node) + _test_missing_stats_only(node) + _test_partitioned_stats(node) + + +def _test_basic_options(node): + node.issues_sql_like( + ["vacuumdb", "postgres"], r"statement: VACUUM.*;", "SQL VACUUM run" + ) + node.issues_sql_like( + ["vacuumdb", "-f", "postgres"], + r"statement: VACUUM \(SKIP_DATABASE_STATS, FULL\).*;", + "vacuumdb -f", + ) + node.issues_sql_like( + ["vacuumdb", "-F", "postgres"], + r"statement: VACUUM \(SKIP_DATABASE_STATS, FREEZE\).*;", + "vacuumdb -F", + ) + node.issues_sql_like( + ["vacuumdb", "-zj2", "postgres"], + r"statement: VACUUM \(SKIP_DATABASE_STATS, ANALYZE\).*;", + "vacuumdb -zj2", + ) + node.issues_sql_like( + ["vacuumdb", "-Z", "postgres"], r"statement: ANALYZE.*;", "vacuumdb -Z" + ) + node.issues_sql_like( + ["vacuumdb", "--disable-page-skipping", "postgres"], + r"statement: VACUUM \(DISABLE_PAGE_SKIPPING, SKIP_DATABASE_STATS\).*;", + "vacuumdb --disable-page-skipping", + ) + node.issues_sql_like( + ["vacuumdb", "--skip-locked", "postgres"], + r"statement: VACUUM \(SKIP_DATABASE_STATS, SKIP_LOCKED\).*;", + "vacuumdb --skip-locked", + ) + node.issues_sql_like( + ["vacuumdb", "--skip-locked", "--analyze-only", "postgres"], + r"statement: ANALYZE \(SKIP_LOCKED\).*;", + "vacuumdb --skip-locked --analyze-only", + ) + node.command_fails( + ["vacuumdb", "--analyze-only", "--disable-page-skipping", "postgres"], + "--analyze-only and --disable-page-skipping specified together", + ) + node.issues_sql_like( + ["vacuumdb", "--no-index-cleanup", "postgres"], + r"statement: VACUUM \(INDEX_CLEANUP FALSE, SKIP_DATABASE_STATS\).*;", + "vacuumdb --no-index-cleanup", + ) + node.command_fails( + ["vacuumdb", "--analyze-only", "--no-index-cleanup", "postgres"], + "--analyze-only and --no-index-cleanup specified together", + ) + node.issues_sql_like( + ["vacuumdb", "--no-truncate", "postgres"], + r"statement: VACUUM \(TRUNCATE FALSE, SKIP_DATABASE_STATS\).*;", + "vacuumdb --no-truncate", + ) + node.command_fails( + ["vacuumdb", "--analyze-only", "--no-truncate", "postgres"], + "--analyze-only and --no-truncate specified together", + ) + node.issues_sql_like( + ["vacuumdb", "--no-process-main", "postgres"], + r"statement: VACUUM \(PROCESS_MAIN FALSE, SKIP_DATABASE_STATS\).*;", + "vacuumdb --no-process-main", + ) + node.command_fails( + ["vacuumdb", "--analyze-only", "--no-process-main", "postgres"], + "--analyze-only and --no-process-main specified together", + ) + node.issues_sql_like( + ["vacuumdb", "--no-process-toast", "postgres"], + r"statement: VACUUM \(PROCESS_TOAST FALSE, SKIP_DATABASE_STATS\).*;", + "vacuumdb --no-process-toast", + ) + node.command_fails( + ["vacuumdb", "--analyze-only", "--no-process-toast", "postgres"], + "--analyze-only and --no-process-toast specified together", + ) + node.issues_sql_like( + ["vacuumdb", "--parallel", "2", "postgres"], + r"statement: VACUUM \(SKIP_DATABASE_STATS, PARALLEL 2\).*;", + "vacuumdb -P 2", + ) + node.issues_sql_like( + ["vacuumdb", "--parallel", "0", "postgres"], + r"statement: VACUUM \(SKIP_DATABASE_STATS, PARALLEL 0\).*;", + "vacuumdb -P 0", + ) + node.command_ok( + ["vacuumdb", "-Z", "--table=pg_am", "dbname=template1"], + "vacuumdb with connection string", + ) + + +def _test_quoting_and_columns(node): + node.command_fails( + ["vacuumdb", "-Zt", "pg_am;ABORT", "postgres"], + 'trailing command in "-t", without COLUMNS', + ) + # Unwanted; better if it failed. + node.command_ok( + ["vacuumdb", "-Zt", "pg_am(amname);ABORT", "postgres"], + 'trailing command in "-t", with COLUMNS', + ) + + node.safe_psql( + 'CREATE TABLE "need""q(uot" (")x" text);' + " CREATE TABLE vactable (a int, b int);" + " CREATE VIEW vacview AS SELECT 1 as a;" + " CREATE FUNCTION f0(int) RETURNS int LANGUAGE SQL AS 'SELECT $1 * $1';" + " CREATE FUNCTION f1(int) RETURNS int LANGUAGE SQL AS 'SELECT f0($1)';" + " CREATE TABLE funcidx (x int);" + " INSERT INTO funcidx VALUES (0),(1),(2),(3);" + ' CREATE SCHEMA "Foo";' + ' CREATE TABLE "Foo".bar(id int);' + ' CREATE SCHEMA "Bar";' + ' CREATE TABLE "Bar".baz(id int);' + ) + node.command_ok( + ["vacuumdb", "-Z", '--table="need""q(uot"(")x")', "postgres"], + "column list", + ) + + node.command_fails( + ["vacuumdb", "--analyze", "--table", "vactable(c)", "postgres"], + "incorrect column name with ANALYZE", + ) + node.command_fails( + ["vacuumdb", "--parallel", "-1", "postgres"], "negative parallel degree" + ) + node.issues_sql_like( + ["vacuumdb", "--analyze", "--table", "vactable(a, b)", "postgres"], + r"statement: VACUUM \(SKIP_DATABASE_STATS, ANALYZE\) public.vactable\(a, b\);", + "vacuumdb --analyze with complete column list", + ) + node.issues_sql_like( + ["vacuumdb", "--analyze-only", "--table", "vactable(b)", "postgres"], + r"statement: ANALYZE public.vactable\(b\);", + "vacuumdb --analyze-only with partial column list", + ) + node.command_checks_all( + ["vacuumdb", "--analyze", "--table", "vacview", "postgres"], + 0, + [r'^.*vacuuming database "postgres"'], + [r"(?s)^WARNING.*cannot vacuum non-tables or special system tables"], + "vacuumdb with view", + ) + node.command_fails( + ["vacuumdb", "--table", "vactable", "--min-mxid-age", "0", "postgres"], + "vacuumdb --min-mxid-age with incorrect value", + ) + node.command_fails( + ["vacuumdb", "--table", "vactable", "--min-xid-age", "0", "postgres"], + "vacuumdb --min-xid-age with incorrect value", + ) + node.issues_sql_like( + ["vacuumdb", "--table", "vactable", "--min-mxid-age", "2147483000", "postgres"], + r"GREATEST.*relminmxid.*2147483000", + "vacuumdb --table --min-mxid-age", + ) + node.issues_sql_like( + ["vacuumdb", "--min-xid-age", "2147483001", "postgres"], + r"GREATEST.*relfrozenxid.*2147483001", + "vacuumdb --table --min-xid-age", + ) + + +def _test_schema_options(node): + node.issues_sql_like( + ["vacuumdb", "--schema", '"Foo"', "postgres"], + r'VACUUM \(SKIP_DATABASE_STATS\) "Foo".bar', + "vacuumdb --schema", + ) + node.issues_sql_unlike( + ["vacuumdb", "--schema", '"Foo"', "postgres", "--dry-run"], + r'VACUUM \(SKIP_DATABASE_STATS\) "Foo".bar', + "vacuumdb --dry-run", + ) + node.issues_sql_like( + ["vacuumdb", "--schema", '"Foo"', "--schema", '"Bar"', "postgres"], + r'(?s)VACUUM \(SKIP_DATABASE_STATS\) "Foo".bar' + r'.*VACUUM \(SKIP_DATABASE_STATS\) "Bar".baz', + "vacuumdb multiple --schema switches", + ) + node.issues_sql_like( + ["vacuumdb", "--exclude-schema", '"Foo"', "postgres"], + r'(?s)^(?!.*VACUUM \(SKIP_DATABASE_STATS\) "Foo".bar).*$', + "vacuumdb --exclude-schema", + ) + node.issues_sql_like( + [ + "vacuumdb", + "--exclude-schema", + '"Foo"', + "--exclude-schema", + '"Bar"', + "postgres", + ], + r'(?s)^(?!.*VACUUM \(SKIP_DATABASE_STATS\) "Foo".bar' + r'|VACUUM \(SKIP_DATABASE_STATS\) "Bar".baz).*$', + "vacuumdb multiple --exclude-schema switches", + ) + node.command_fails_like( + [ + "vacuumdb", + "--exclude-schema", + "pg_catalog", + "--table", + "pg_class", + "postgres", + ], + r"cannot vacuum specific table\(s\) and exclude schema\(s\) at the same time", + "cannot use options --exclude-schema and ---table at the same time", + ) + node.command_fails_like( + ["vacuumdb", "--schema", "pg_catalog", "--table", "pg_class", "postgres"], + r"cannot vacuum all tables in schema\(s\) and specific table\(s\) at the " + r"same time", + "cannot use options --schema and ---table at the same time", + ) + node.command_fails_like( + ["vacuumdb", "--schema", "pg_catalog", "--exclude-schema", '"Foo"', "postgres"], + r"cannot vacuum all tables in schema\(s\) and exclude schema\(s\) at the " + r"same time", + "cannot use options --schema and --exclude-schema at the same time", + ) + node.issues_sql_like( + ["vacuumdb", "--all", "--exclude-schema", "pg_catalog"], + r"(?:(?!VACUUM \(SKIP_DATABASE_STATS\) pg_catalog.pg_class).)*", + "vacuumdb --all --exclude-schema", + ) + node.issues_sql_like( + ["vacuumdb", "--all", "--schema", "pg_catalog"], + r"VACUUM \(SKIP_DATABASE_STATS\) pg_catalog.pg_class", + "vacuumdb --all ---schema", + ) + node.issues_sql_like( + ["vacuumdb", "--all", "--table", "pg_class"], + r"VACUUM \(SKIP_DATABASE_STATS\) pg_catalog.pg_class", + "vacuumdb --all --table", + ) + node.command_fails_like( + ["vacuumdb", "--all", "-d", "postgres"], + r"cannot vacuum all databases and a specific one at the same time", + "cannot use options --all and --dbname at the same time", + ) + node.command_fails_like( + ["vacuumdb", "--all", "postgres"], + r"cannot vacuum all databases and a specific one at the same time", + "cannot use option --all and a dbname as argument at the same time", + ) + + +def _test_missing_stats_only(node): + node.safe_psql( + "CREATE TABLE regression_vacuumdb_test AS " + "select generate_series(1, 10) a, generate_series(2, 11) b;" + " ALTER TABLE regression_vacuumdb_test " + "ADD COLUMN c INT GENERATED ALWAYS AS (a + b);" + ) + node.issues_sql_unlike( + [ + "vacuumdb", + "--analyze-only", + "--dry-run", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only --dry-run", + ) + node.issues_sql_like( + [ + "vacuumdb", + "--analyze-only", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with missing stats", + ) + node.issues_sql_unlike( + [ + "vacuumdb", + "--analyze-only", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with no missing stats", + ) + + node.safe_psql( + "CREATE INDEX regression_vacuumdb_test_idx " + "ON regression_vacuumdb_test (mod(a, 2));" + ) + node.issues_sql_like( + [ + "vacuumdb", + "--analyze-in-stages", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with missing index expression stats", + ) + node.issues_sql_unlike( + [ + "vacuumdb", + "--analyze-in-stages", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with no missing index expression stats", + ) + + node.safe_psql( + "CREATE STATISTICS regression_vacuumdb_test_stat " + "ON a, b FROM regression_vacuumdb_test;" + ) + node.issues_sql_like( + [ + "vacuumdb", + "--analyze-only", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with missing extended stats", + ) + node.issues_sql_unlike( + [ + "vacuumdb", + "--analyze-only", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with no missing extended stats", + ) + + node.safe_psql( + "CREATE TABLE regression_vacuumdb_child (a INT) " + "INHERITS (regression_vacuumdb_test);\n" + "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n" + "ANALYZE regression_vacuumdb_child;\n" + ) + node.issues_sql_like( + [ + "vacuumdb", + "--analyze-in-stages", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with missing inherited stats", + ) + node.issues_sql_unlike( + [ + "vacuumdb", + "--analyze-in-stages", + "--missing-stats-only", + "-t", + "regression_vacuumdb_test", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with no missing inherited stats", + ) + + +def _test_partitioned_stats(node): + node.safe_psql( + "CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n" + "CREATE TABLE regression_vacuumdb_part1 PARTITION OF " + "regression_vacuumdb_parted FOR VALUES IN (1);\n" + "INSERT INTO regression_vacuumdb_parted VALUES (1);\n" + "ANALYZE regression_vacuumdb_part1;\n" + ) + node.issues_sql_like( + [ + "vacuumdb", + "--analyze-only", + "--missing-stats-only", + "-t", + "regression_vacuumdb_parted", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with missing partition stats", + ) + node.issues_sql_unlike( + [ + "vacuumdb", + "--analyze-only", + "--missing-stats-only", + "-t", + "regression_vacuumdb_parted", + "postgres", + ], + r"(?s)statement: ANALYZE", + "--missing-stats-only with no missing partition stats", + ) + + node.safe_psql( + "CREATE TABLE parent_table (a INT) PARTITION BY LIST (a);\n" + "CREATE TABLE child_table PARTITION OF parent_table FOR VALUES IN (1);\n" + "INSERT INTO parent_table VALUES (1);\n" + ) + node.issues_sql_like( + ["vacuumdb", "--analyze-only", "postgres"], + r"(?s)statement: ANALYZE public.parent_table", + "--analyze-only updates statistics for partitioned tables", + ) + node.issues_sql_like( + ["vacuumdb", "--analyze-in-stages", "postgres"], + r"(?s)statement: ANALYZE public.parent_table", + "--analyze-in-stages updates statistics for partitioned tables", + ) + node.issues_sql_unlike( + ["vacuumdb", "--analyze-only", "postgres"], + r"(?s)statement: VACUUM", + "--analyze-only does not run vacuum", + ) diff --git a/src/bin/scripts/pyt/test_101_vacuumdb_all.py b/src/bin/scripts/pyt/test_101_vacuumdb_all.py new file mode 100644 index 0000000000000..774f2c0f1dab9 --- /dev/null +++ b/src/bin/scripts/pyt/test_101_vacuumdb_all.py @@ -0,0 +1,29 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/101_vacuumdb_all.pl.""" + + +def test_vacuumdb_all(create_pg): + """vacuumdb --all vacuums every database and skips invalid ones.""" + node = create_pg("main") + + node.issues_sql_like( + ["vacuumdb", "--all"], + r"(?s)statement: VACUUM.*statement: VACUUM", + "vacuum all databases", + ) + + node.safe_psql( + "CREATE DATABASE regression_invalid;" + " UPDATE pg_database SET datconnlimit = -2" + " WHERE datname = 'regression_invalid';" + ) + node.command_ok( + ["vacuumdb", "--all"], "invalid database not targeted by vacuumdb -a" + ) + + node.command_fails_like( + ["vacuumdb", "--dbname", "regression_invalid"], + r'FATAL: cannot connect to invalid database "regression_invalid"', + "vacuumdb cannot target invalid database", + ) diff --git a/src/bin/scripts/pyt/test_102_vacuumdb_stages.py b/src/bin/scripts/pyt/test_102_vacuumdb_stages.py new file mode 100644 index 0000000000000..16e0ebe92de11 --- /dev/null +++ b/src/bin/scripts/pyt/test_102_vacuumdb_stages.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/102_vacuumdb_stages.pl.""" + +_STAGES_ONE_DB = ( + r"(?s)" + r"statement: SET default_statistics_target=1; SET vacuum_cost_delay=0;" + r".*statement: ANALYZE" + r".*statement: SET default_statistics_target=10; RESET vacuum_cost_delay;" + r".*statement: ANALYZE" + r".*statement: RESET default_statistics_target;" + r".*statement: ANALYZE" +) + +_STAGES_ALL_DB = ( + r"(?s)" + r"statement: SET default_statistics_target=1; SET vacuum_cost_delay=0;" + r".*statement: ANALYZE" + r".*statement: SET default_statistics_target=1; SET vacuum_cost_delay=0;" + r".*statement: ANALYZE" + r".*statement: SET default_statistics_target=10; RESET vacuum_cost_delay;" + r".*statement: ANALYZE" + r".*statement: SET default_statistics_target=10; RESET vacuum_cost_delay;" + r".*statement: ANALYZE" + r".*statement: RESET default_statistics_target;" + r".*statement: ANALYZE" + r".*statement: RESET default_statistics_target;" + r".*statement: ANALYZE" +) + + +def test_vacuumdb_stages(create_pg): + """vacuumdb --analyze-in-stages issues the staged ANALYZE sequence.""" + node = create_pg("main") + + node.issues_sql_like( + ["vacuumdb", "--analyze-in-stages", "postgres"], + _STAGES_ONE_DB, + "analyze three times", + ) + + node.issues_sql_like( + ["vacuumdb", "--analyze-in-stages", "--all"], + _STAGES_ALL_DB, + "analyze more than one database in stages", + ) diff --git a/src/bin/scripts/pyt/test_200_connstr.py b/src/bin/scripts/pyt/test_200_connstr.py new file mode 100644 index 0000000000000..3f09acb89d888 --- /dev/null +++ b/src/bin/scripts/pyt/test_200_connstr.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/scripts/t/200_connstr.pl. + +Checks connection-string / database-name handling in the client utilities by +creating databases whose names span the LATIN1 byte range (including bytes that +are not valid UTF-8) and running the --all options over them. +""" + + +def _generate_ascii_bytes(from_char, to_char): + """Return the bytes from_char..to_char inclusive (cf. generate_ascii_string).""" + return bytes(range(from_char, to_char + 1)) + + +def test_connstr(create_pg, monkeypatch): + """vacuumdb/reindexdb/clusterdb --all cope with unusual database names.""" + # Use byte sequences that aren't valid UTF-8. LATIN1 accepts any byte. + monkeypatch.setenv("LC_ALL", "C") + monkeypatch.setenv("PGCLIENTENCODING", "LATIN1") + + # Database names covering the range of LATIN1 characters. + dbname1 = _generate_ascii_bytes(1, 63) # contains '=' + dbname2 = _generate_ascii_bytes(67, 129) # skip 64-66 to keep length to 62 + dbname3 = _generate_ascii_bytes(130, 192) + dbname4 = _generate_ascii_bytes(193, 255) + + node = create_pg("main", extra=["--locale=C", "--encoding=LATIN1"]) + + for dbname in (dbname1, dbname2, dbname3, dbname4, b"CamelCase"): + # Like run_log: run and ignore the result (some names are rejected, + # e.g. those containing a newline). + node.bin.result([b"createdb", dbname]) + + node.command_ok( + ["vacuumdb", "--all", "--echo", "--analyze-only"], + "vacuumdb --all with unusual database names", + ) + node.command_ok( + ["reindexdb", "--all", "--echo"], + "reindexdb --all with unusual database names", + ) + node.command_ok( + ["clusterdb", "--all", "--echo", "--verbose"], + "clusterdb --all with unusual database names", + ) diff --git a/src/tools/pg_bsd_indent/meson.build b/src/tools/pg_bsd_indent/meson.build index 3d292e8febb35..16c657f0a7229 100644 --- a/src/tools/pg_bsd_indent/meson.build +++ b/src/tools/pg_bsd_indent/meson.build @@ -38,4 +38,9 @@ tests += { 't/001_pg_bsd_indent.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_pg_bsd_indent.py', + ], + }, } diff --git a/src/tools/pg_bsd_indent/pyt/test_001_pg_bsd_indent.py b/src/tools/pg_bsd_indent/pyt/test_001_pg_bsd_indent.py new file mode 100644 index 0000000000000..1d56b1a2db920 --- /dev/null +++ b/src/tools/pg_bsd_indent/pyt/test_001_pg_bsd_indent.py @@ -0,0 +1,59 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/tools/pg_bsd_indent/t/001_pg_bsd_indent.pl. + +Runs pg_bsd_indent over each *.0 fixture in the tests/ directory with its +matching *.pro profile and checks the formatted output matches the recorded +*.0.stdout, accumulating any differences in a test.diffs file. Also checks +--version. +""" + +import glob +import os +import shutil +import subprocess +import sys +import tempfile + + +def test_001_pg_bsd_indent(pg_bin): + """pg_bsd_indent formats each fixture to its recorded expected output.""" + src_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + workdir = tempfile.mkdtemp(prefix="bsdindent_") + pg_bin.command_ok(["pg_bsd_indent", "--version"], "pg_bsd_indent --version") + diffopts = ["-U3"] + if sys.platform == "win32": + diffopts.append("--strip-trailing-cr") + for listfile in glob.glob(os.path.join(src_dir, "tests", "*.list")): + shutil.copy(listfile, workdir) + # pg_bsd_indent resolves the typedef *.list files (named in each *.pro) from + # the current directory, so run from workdir as the Perl chdir's to tmp_check. + prev_cwd = os.getcwd() + os.chdir(workdir) + try: + _run_fixtures(pg_bin, src_dir, diffopts) + finally: + os.chdir(prev_cwd) + + +def _run_fixtures(pg_bin, src_dir, diffopts): + diffs_file = "test.diffs" + for test_src in sorted(glob.glob(os.path.join(src_dir, "tests", "*.0"))): + test = os.path.basename(test_src)[:-2] + out = test + ".out" + pg_bin.command_ok( + [ + "pg_bsd_indent", + test_src, + out, + "-P{}".format(os.path.join(src_dir, "tests", test + ".pro")), + ], + "pg_bsd_indent succeeds on {}".format(test), + ) + with open(diffs_file, "a", encoding="utf-8") as fh: + rc = subprocess.run( + ["diff", *diffopts, test_src + ".stdout", out], + stdout=fh, + check=False, + ).returncode + assert rc == 0, "pg_bsd_indent output matches for {}".format(test) From 8554d3b1aa6dc0c72bc4bd6355168fe4a963a83d Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:32 -0400 Subject: [PATCH 11/36] pytest: port backup and rewind TAP suites Port pg_basebackup, pg_verifybackup, pg_combinebackup, and pg_rewind to pytest, including the large pg_basebackup 010/040 matrices and the stateful RewindTest scenarios. Co-authored-by: Greg Burd --- src/bin/pg_basebackup/meson.build | 13 + .../pyt/test_010_pg_basebackup.py | 1164 +++++++++++++++++ .../pyt/test_011_in_place_tablespace.py | 40 + .../pyt/test_020_pg_receivewal.py | 289 ++++ .../pyt/test_030_pg_recvlogical.py | 202 +++ .../pyt/test_040_pg_createsubscriber.py | 750 +++++++++++ src/bin/pg_combinebackup/meson.build | 15 + .../pg_combinebackup/pyt/test_001_basic.py | 26 + .../pyt/test_002_compare_backups.py | 198 +++ .../pg_combinebackup/pyt/test_003_timeline.py | 87 ++ .../pg_combinebackup/pyt/test_004_manifest.py | 65 + .../pyt/test_005_integrity.py | 166 +++ .../pyt/test_006_db_file_copy.py | 60 + .../pyt/test_007_wal_level_minimal.py | 52 + .../pg_combinebackup/pyt/test_008_promote.py | 61 + .../pyt/test_009_no_full_file.py | 62 + .../pg_combinebackup/pyt/test_010_hardlink.py | 95 ++ .../pyt/test_011_ib_truncation.py | 66 + src/bin/pg_rewind/meson.build | 15 + src/bin/pg_rewind/pyt/test_001_basic.py | 120 ++ src/bin/pg_rewind/pyt/test_002_databases.py | 52 + src/bin/pg_rewind/pyt/test_003_extrafiles.py | 88 ++ .../pg_rewind/pyt/test_004_pg_xlog_symlink.py | 46 + .../pg_rewind/pyt/test_005_same_timeline.py | 17 + src/bin/pg_rewind/pyt/test_006_options.py | 57 + .../pg_rewind/pyt/test_007_standby_source.py | 82 ++ .../pyt/test_008_min_recovery_point.py | 74 ++ .../pg_rewind/pyt/test_009_growing_files.py | 60 + .../pyt/test_010_keep_recycled_wals.py | 48 + src/bin/pg_rewind/pyt/test_011_wal_copy.py | 84 ++ src/bin/pg_verifybackup/meson.build | 14 + src/bin/pg_verifybackup/pyt/test_001_basic.py | 43 + .../pg_verifybackup/pyt/test_002_algorithm.py | 69 + .../pyt/test_003_corruption.py | 304 +++++ .../pg_verifybackup/pyt/test_004_options.py | 113 ++ .../pyt/test_005_bad_manifest.py | 175 +++ .../pg_verifybackup/pyt/test_006_encoding.py | 38 + src/bin/pg_verifybackup/pyt/test_007_wal.py | 92 ++ src/bin/pg_verifybackup/pyt/test_008_untar.py | 103 ++ .../pg_verifybackup/pyt/test_009_extract.py | 73 ++ .../pyt/test_010_client_untar.py | 87 ++ 41 files changed, 5265 insertions(+) create mode 100644 src/bin/pg_basebackup/pyt/test_010_pg_basebackup.py create mode 100644 src/bin/pg_basebackup/pyt/test_011_in_place_tablespace.py create mode 100644 src/bin/pg_basebackup/pyt/test_020_pg_receivewal.py create mode 100644 src/bin/pg_basebackup/pyt/test_030_pg_recvlogical.py create mode 100644 src/bin/pg_basebackup/pyt/test_040_pg_createsubscriber.py create mode 100644 src/bin/pg_combinebackup/pyt/test_001_basic.py create mode 100644 src/bin/pg_combinebackup/pyt/test_002_compare_backups.py create mode 100644 src/bin/pg_combinebackup/pyt/test_003_timeline.py create mode 100644 src/bin/pg_combinebackup/pyt/test_004_manifest.py create mode 100644 src/bin/pg_combinebackup/pyt/test_005_integrity.py create mode 100644 src/bin/pg_combinebackup/pyt/test_006_db_file_copy.py create mode 100644 src/bin/pg_combinebackup/pyt/test_007_wal_level_minimal.py create mode 100644 src/bin/pg_combinebackup/pyt/test_008_promote.py create mode 100644 src/bin/pg_combinebackup/pyt/test_009_no_full_file.py create mode 100644 src/bin/pg_combinebackup/pyt/test_010_hardlink.py create mode 100644 src/bin/pg_combinebackup/pyt/test_011_ib_truncation.py create mode 100644 src/bin/pg_rewind/pyt/test_001_basic.py create mode 100644 src/bin/pg_rewind/pyt/test_002_databases.py create mode 100644 src/bin/pg_rewind/pyt/test_003_extrafiles.py create mode 100644 src/bin/pg_rewind/pyt/test_004_pg_xlog_symlink.py create mode 100644 src/bin/pg_rewind/pyt/test_005_same_timeline.py create mode 100644 src/bin/pg_rewind/pyt/test_006_options.py create mode 100644 src/bin/pg_rewind/pyt/test_007_standby_source.py create mode 100644 src/bin/pg_rewind/pyt/test_008_min_recovery_point.py create mode 100644 src/bin/pg_rewind/pyt/test_009_growing_files.py create mode 100644 src/bin/pg_rewind/pyt/test_010_keep_recycled_wals.py create mode 100644 src/bin/pg_rewind/pyt/test_011_wal_copy.py create mode 100644 src/bin/pg_verifybackup/pyt/test_001_basic.py create mode 100644 src/bin/pg_verifybackup/pyt/test_002_algorithm.py create mode 100644 src/bin/pg_verifybackup/pyt/test_003_corruption.py create mode 100644 src/bin/pg_verifybackup/pyt/test_004_options.py create mode 100644 src/bin/pg_verifybackup/pyt/test_005_bad_manifest.py create mode 100644 src/bin/pg_verifybackup/pyt/test_006_encoding.py create mode 100644 src/bin/pg_verifybackup/pyt/test_007_wal.py create mode 100644 src/bin/pg_verifybackup/pyt/test_008_untar.py create mode 100644 src/bin/pg_verifybackup/pyt/test_009_extract.py create mode 100644 src/bin/pg_verifybackup/pyt/test_010_client_untar.py diff --git a/src/bin/pg_basebackup/meson.build b/src/bin/pg_basebackup/meson.build index d70ce5786a261..657c9441c77fb 100644 --- a/src/bin/pg_basebackup/meson.build +++ b/src/bin/pg_basebackup/meson.build @@ -105,6 +105,19 @@ tests += { 't/040_pg_createsubscriber.pl', ], }, + 'pytest': { + 'env': {'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', + 'TAR': tar.found() ? tar.full_path() : '', + 'LZ4': program_lz4.found() ? program_lz4.full_path() : '', + }, + 'tests': [ + 'pyt/test_010_pg_basebackup.py', + 'pyt/test_011_in_place_tablespace.py', + 'pyt/test_020_pg_receivewal.py', + 'pyt/test_030_pg_recvlogical.py', + 'pyt/test_040_pg_createsubscriber.py', + ], + }, } subdir('po', if_found: libintl) diff --git a/src/bin/pg_basebackup/pyt/test_010_pg_basebackup.py b/src/bin/pg_basebackup/pyt/test_010_pg_basebackup.py new file mode 100644 index 0000000000000..6878944fea37c --- /dev/null +++ b/src/bin/pg_basebackup/pyt/test_010_pg_basebackup.py @@ -0,0 +1,1164 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-lines +"""Port of src/bin/pg_basebackup/t/010_pg_basebackup.pl. + +The broad pg_basebackup test: option sanity, client- and server-side +compression-spec failures, exclusion of non-copied files/forks/temp relations, +permission checks, separate WAL directory, tar format, tablespace mapping +(plain and tar restore), symlinks, recovery-conf generation, WAL fetch/stream +modes, backup targets, replication slots, checksum-mismatch reporting, gzip +compression, a background-stream-process termination test, an in-place +tablespace backup, and the different-system-identifier incremental failure. +""" + +import glob +import os +import re +import shutil +import subprocess +import tempfile +import threading +import time + +import pypg + +# Options shared by nearly all pg_basebackup invocations, mirroring the Perl +# @pg_basebackup_defs (keep test times reasonable). +_DEFS = ["pg_basebackup", "--no-sync", "-cfast"] + +# Files that should never be copied into a backup. +_DONOTCOPY_FILES = [ + "backup_label", + "tablespace_map", + "postgresql.auto.conf.tmp", + "current_logfiles.tmp", + "global/pg_internal.init.123", +] + +_TEMP_RELATION_FILES = ["t999_999", "t9999_999.1", "t999_9999_vm", "t99999_99999_vm.1"] + +_COMPRESSION_FAILURE_TESTS = [ + ( + "extrasquishy", + 'unrecognized compression algorithm: "extrasquishy"', + "failure on invalid compression algorithm", + ), + ( + "gzip:", + "invalid compression specification: found empty string where a compression option was expected", + "failure on empty compression options list", + ), + ( + "gzip:thunk", + 'invalid compression specification: unrecognized compression option: "thunk"', + "failure on unknown compression option", + ), + ( + "gzip:level", + 'invalid compression specification: compression option "level" requires a value', + "failure on missing compression level", + ), + ( + "gzip:level=", + 'invalid compression specification: value for compression option "level" must be an integer', + "failure on empty compression level", + ), + ( + "gzip:level=high", + 'invalid compression specification: value for compression option "level" must be an integer', + "failure on non-numeric compression level", + ), + ( + "gzip:level=236", + 'invalid compression specification: compression algorithm "gzip" expects a compression level between 1 and 9', + "failure on out-of-range compression level", + ), + ( + "gzip:level=9,", + "invalid compression specification: found empty string where a compression option was expected", + "failure on extra, empty compression option", + ), + ( + "gzip:workers=3", + 'invalid compression specification: compression algorithm "gzip" does not accept a worker count', + "failure on worker count for gzip", + ), + ( + "gzip:long", + 'invalid compression specification: compression algorithm "gzip" does not support long-distance mode', + "failure on long mode for gzip", + ), +] + +_SUPERLONGNAME = "superlongname_" + ("x" * 100) + + +class _BgCommand: + """A background command whose stderr is captured for pump_until. + + The Python analogue of the ``IPC::Run::start`` handle the Perl test keeps: + stderr is read by a thread so the test can poll the accumulated stderr for + a pattern (mirroring ``pump_until``), then ``finish`` waits for exit. + """ + + def __init__(self, cmd, env, timeout): + self._timeout = timeout + self._lock = threading.Lock() + self._stderr = "" + # pylint: disable=consider-using-with # long-lived; closed in finish() + self._proc = subprocess.Popen( + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + env=env, + encoding="utf-8", + errors="replace", + bufsize=1, + ) + self._thread = threading.Thread(target=self._reader, daemon=True) + self._thread.start() + + def _reader(self): + assert self._proc.stderr is not None + for line in iter(self._proc.stderr.readline, ""): + with self._lock: + self._stderr += line + + def pump_until(self, pattern): + """Poll captured stderr until pattern matches; return True, else False.""" + regex = re.compile(pattern, re.DOTALL) + deadline = time.monotonic() + self._timeout + while True: + with self._lock: + if regex.search(self._stderr): + return True + exited = self._proc.poll() is not None + with self._lock: + if exited and regex.search(self._stderr): + return True + if exited or time.monotonic() > deadline: + return False + time.sleep(0.05) + + def finish(self): + """Wait for the process to exit and join the reader thread.""" + self._proc.wait() + self._thread.join() + + +def _badchars_file(tempdir): + """Write a file with a non-UTF8 name (some Windows code pages reject it). + + Mirrors the Perl test, which writes into $tempdir/pgdata (a scratch dir), + not the server's data directory. + """ + name = os.fsencode("{}/pgdata/".format(tempdir)) + b"FOO\xe0\xe0\xe0BAR" + with open(name, "ab") as fh: + fh.write(b"test backup of file with non-UTF8 name\n") + + +def _option_and_wal_config_failures(node, tempdir, pgdata): + """Option sanity, missing WAL config, and the no-clean directory behaviors.""" + node.command_fails( + ["pg_basebackup"], "pg_basebackup needs target directory specified" + ) + node.command_fails_like( + [ + "pg_basebackup", + "--pgdata", + "{}/backup".format(tempdir), + "--compress", + "none:1", + ], + r'compression algorithm "none" does not accept a compression level', + 'failure if method "none" specified with compression level', + ) + node.command_fails_like( + [ + "pg_basebackup", + "--pgdata", + "{}/backup".format(tempdir), + "--compress", + "none+", + ], + r'unrecognized compression algorithm: "none\+"', + "failure on incorrect separator to define compression level", + ) + _badchars_file(tempdir) + node.append_conf( + "\n# Allow replication (set up by the test)\n", filename="pg_hba.conf" + ) + node.reload() + node.command_fails( + _DEFS + ["--pgdata", "{}/backup".format(tempdir)], + "pg_basebackup fails because of WAL configuration", + ) + assert not os.path.isdir( + "{}/backup".format(tempdir) + ), "backup directory was cleaned up" + # A non-empty backup directory makes the next run fail but leaves it behind. + os.mkdir("{}/backup".format(tempdir)) + pypg.append_to_file("{}/backup/dir-not-empty.txt".format(tempdir), "Some data") + node.command_fails( + _DEFS + ["--pgdata", "{}/backup".format(tempdir), "-n"], + "failing run with no-clean option", + ) + assert os.path.isdir( + "{}/backup".format(tempdir) + ), "backup directory was created and left behind" + shutil.rmtree("{}/backup".format(tempdir)) + pypg.append_to_file( + pgdata / "postgresql.conf", + "max_replication_slots = 10\nmax_wal_senders = 10\nwal_level = replica\n", + ) + node.restart() + + +def _compression_failure_tests(node, tempdir): + """Client- and server-side invalid compression specs both fail (ZLIB only).""" + if not pypg.check_pg_config(r"#define HAVE_LIBZ 1"): + return + client_fails = "pg_basebackup: error: " + server_fails = "pg_basebackup: error: could not initiate base backup: ERROR: " + for spec, message, desc in _COMPRESSION_FAILURE_TESTS: + node.command_fails_like( + [ + "pg_basebackup", + "--pgdata", + "{}/backup".format(tempdir), + "--compress", + spec, + ], + re.escape(client_fails + message), + "client " + desc, + ) + node.command_fails_like( + [ + "pg_basebackup", + "--pgdata", + "{}/backup".format(tempdir), + "--compress", + "server-" + spec, + ], + re.escape(server_fails + message), + "server " + desc, + ) + + +def _write_donotcopy_files(pgdata): + """Write files that should not be copied, plus a non-darwin .DS_Store.""" + for filename in _DONOTCOPY_FILES: + with open(pgdata / filename, "ab") as fh: + fh.write(b"DONOTCOPY") + with open(pgdata / ".DS_Store", "ab") as fh: + fh.write(b"DONOTCOPY") + + +def _setup_relations_for_exclusion(node, pgdata): + """Create unlogged + temp-looking relation files to verify exclusion.""" + # Connect to create global/pg_internal.init (else the not-copied check is a + # false positive). + node.safe_psql("SELECT 1;") + node.safe_psql("CREATE UNLOGGED TABLE base_unlogged (id int)") + base_unlogged_path = node.safe_psql("select pg_relation_filepath('base_unlogged')") + assert os.path.isfile( + "{}/{}_init".format(pgdata, base_unlogged_path) + ), "unlogged init fork in base" + assert os.path.isfile( + "{}/{}".format(pgdata, base_unlogged_path) + ), "unlogged main fork in base" + postgres_oid = node.safe_psql( + "select oid from pg_database where datname = 'postgres'" + ) + for filename in _TEMP_RELATION_FILES: + pypg.append_to_file( + "{}/base/{}/{}".format(pgdata, postgres_oid, filename), "TEMP_RELATION" + ) + return base_unlogged_path, postgres_oid + + +def _run_first_backup_and_check_exclusions( + node, tempdir, pgdata, base_unlogged_path, postgres_oid +): + """Run the first backup and verify all the exclusion/permission rules.""" + node.command_ok( + _DEFS + ["--pgdata", "{}/backup".format(tempdir), "--wal-method", "none"], + "pg_basebackup runs", + ) + backup = "{}/backup".format(tempdir) + assert os.path.isfile("{}/PG_VERSION".format(backup)), "backup was created" + assert os.path.isfile( + "{}/backup_manifest".format(backup) + ), "backup manifest included" + assert pypg.check_mode_recursive( + backup, 0o700, 0o600 + ), "check backup dir permissions" + assert sorted(pypg.slurp_dir("{}/pg_wal/".format(backup))) == sorted( + ["archive_status", "summaries"] + ), "no WAL files copied" + for dirname in [ + "pg_dynshmem", + "pg_notify", + "pg_replslot", + "pg_serial", + "pg_snapshots", + "pg_stat_tmp", + "pg_subtrans", + ]: + assert ( + sorted(pypg.slurp_dir("{}/{}/".format(backup, dirname))) == [] + ), "contents of {}/ not copied".format(dirname) + for filename in [ + "postgresql.auto.conf.tmp", + "postmaster.opts", + "postmaster.pid", + "tablespace_map", + "current_logfiles.tmp", + "global/pg_internal.init", + "global/pg_internal.init.123", + ]: + assert not os.path.isfile( + "{}/{}".format(backup, filename) + ), "{} not copied".format(filename) + assert not os.path.isfile("{}/.DS_Store".format(backup)), ".DS_Store not copied" + assert os.path.isfile( + "{}/{}_init".format(backup, base_unlogged_path) + ), "unlogged init fork in backup" + assert not os.path.isfile( + "{}/{}".format(backup, base_unlogged_path) + ), "unlogged main fork not in backup" + for filename in _TEMP_RELATION_FILES: + assert not os.path.isfile( + "{}/base/{}/{}".format(backup, postgres_oid, filename) + ), "base/{}/{} not copied".format(postgres_oid, filename) + assert ( + pypg.slurp_file("{}/backup_label".format(backup)) != "DONOTCOPY" + ), "existing backup_label not copied" + shutil.rmtree(backup) + # Delete the bogus backup_label so it does not interfere with startup. + os.unlink(pgdata / "backup_label") + + +def _waldir_and_tar_format(node, tempdir): + """Separate xlog dir, tar format, and tablespace-mapping format failures.""" + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backup2".format(tempdir), + "--no-manifest", + "--waldir", + "{}/xlog2".format(tempdir), + ], + "separate xlog directory", + ) + assert os.path.isfile("{}/backup2/PG_VERSION".format(tempdir)), "backup was created" + assert not os.path.isfile( + "{}/backup2/backup_manifest".format(tempdir) + ), "manifest was suppressed" + assert os.path.isdir("{}/xlog2/".format(tempdir)), "xlog directory was created" + shutil.rmtree("{}/backup2".format(tempdir)) + shutil.rmtree("{}/xlog2".format(tempdir)) + node.command_ok( + _DEFS + ["--pgdata", "{}/tarbackup".format(tempdir), "--format", "tar"], + "tar format", + ) + assert os.path.isfile( + "{}/tarbackup/base.tar".format(tempdir) + ), "backup tar was created" + shutil.rmtree("{}/tarbackup".format(tempdir)) + _tablespace_mapping_format_failures(node, tempdir) + + +def _tablespace_mapping_format_failures(node, tempdir): + """All the invalid --tablespace-mapping format errors.""" + cases = [ + ("=/foo", r"invalid tablespace mapping format", "empty old directory"), + ("/foo=", r"invalid tablespace mapping format", "empty new directory"), + ("/foo=/bar=/baz", r'multiple "=" signs in tablespace mapping', "multiple ="), + ( + "foo=/bar", + r"old directory is not an absolute path in tablespace mapping", + "old directory not absolute", + ), + ( + "/foo=bar", + r"new directory is not an absolute path in tablespace mapping", + "new directory not absolute", + ), + ("foo", r"invalid tablespace mapping format", "invalid format"), + ] + for mapping, pattern, desc in cases: + node.command_fails_like( + _DEFS + + [ + "--pgdata", + "{}/backup_foo".format(tempdir), + "--format", + "plain", + "--tablespace-mapping", + mapping, + ], + pattern, + "--tablespace-mapping with {} fails".format(desc), + ) + + +def _long_name_tar(node, tempdir, pgdata): + """Tar format cannot store filenames longer than 100 bytes.""" + superlongpath = pgdata / _SUPERLONGNAME + with open(superlongpath, "w", encoding="utf-8"): + pass + node.command_fails( + _DEFS + ["--pgdata", "{}/tarbackup_l1".format(tempdir), "--format", "tar"], + "pg_basebackup tar with long name fails", + ) + os.unlink(superlongpath) + + +def _setup_symlinks(node, pgdata, tempdir): + """Move pg_replslot out of pgdata under a symlink; return sys-temp paths.""" + node.stop() + os.umask(0o027) + pypg.chmod_recursive(str(pgdata), 0o750, 0o640) + sys_tempdir = tempfile.mkdtemp(prefix="pgbb_") + # pg_replslot should be empty; recreate it under sys_tempdir before + # symlinking to avoid moving things across drives. + os.rmdir(pgdata / "pg_replslot") + os.mkdir("{}/pg_replslot".format(sys_tempdir)) + os.symlink("{}/pg_replslot".format(sys_tempdir), pgdata / "pg_replslot") + node.start() + real_sys_tempdir = "{}/tempdir".format(sys_tempdir) + os.symlink(tempdir, real_sys_tempdir) + return sys_tempdir, real_sys_tempdir + + +def _tablespace_tar_backup(node, create_pg, tempdir, real_sys_tempdir): + """Tar-format backup of a tablespace, restored into a replica via tar.""" + os.mkdir("{}/tblspc1".format(tempdir)) + real_ts_dir = "{}/tblspc1".format(real_sys_tempdir) + node.safe_psql("CREATE TABLESPACE tblspc1 LOCATION '{}';".format(real_ts_dir)) + node.safe_psql( + "CREATE TABLE test1 (a int) TABLESPACE tblspc1;INSERT INTO test1 VALUES (1234);" + ) + node.backup("tarbackup2", backup_options=["--format", "tar"]) + node.safe_psql("TRUNCATE TABLE test1;") + backupdir = "{}/tarbackup2".format(node.backup_dir) + assert os.path.isfile("{}/base.tar".format(backupdir)), "backup tar was created" + assert os.path.isfile("{}/pg_wal.tar".format(backupdir)), "WAL tar was created" + tblspc_tars = glob.glob("{}/[0-9]*.tar".format(backupdir)) + assert len(tblspc_tars) == 1, "one tablespace tar was created" + tar = os.environ.get("TAR") + if not tar: + return + match = re.search(r"/([0-9]*)\.tar$", tblspc_tars[0]) + assert match is not None + tblspcoid = match.group(1) + real_rep_ts_dir = "{}/tblspc1replica".format(real_sys_tempdir) + node2 = create_pg( + "replica", + from_backup=(node, "tarbackup2"), + tar_program=tar, + tablespace_map={tblspcoid: real_rep_ts_dir}, + start=False, + ) + node2.start() + assert ( + node2.safe_psql("SELECT * FROM test1") == "1234" + ), "tablespace data restored from tar-format backup" + node2.stop() + + +def _tablespace_unlogged_temp_setup(node, pgdata, real_sys_tempdir, postgres_oid): + """Create unlogged + temp-looking relation files inside the tablespace.""" + node.safe_psql( + "CREATE UNLOGGED TABLE tblspc1_unlogged (id int) TABLESPACE tblspc1;" + ) + tblspc1_unlogged_path = node.safe_psql( + "select pg_relation_filepath('tblspc1_unlogged')" + ) + assert os.path.isfile( + "{}/{}_init".format(pgdata, tblspc1_unlogged_path) + ), "unlogged init fork in tablespace" + assert os.path.isfile( + "{}/{}".format(pgdata, tblspc1_unlogged_path) + ), "unlogged main fork in tablespace" + test1_path = node.safe_psql("select pg_relation_filepath('test1')") + tbl_spc1_id = os.path.basename(os.path.dirname(os.path.dirname(test1_path))) + for filename in ["t888_888", "t888888_888888_vm.1"]: + pypg.append_to_file( + "{}/tblspc1/{}/{}/{}".format( + real_sys_tempdir, tbl_spc1_id, postgres_oid, filename + ), + "TEMP_RELATION", + ) + return tblspc1_unlogged_path, tbl_spc1_id + + +def _tablespace_plain_backup(node, tempdir, pgdata, real_ts_dir): + """Plain backup fails without mapping, then succeeds and relocates.""" + node.command_fails( + _DEFS + ["--pgdata", "{}/backup1".format(tempdir), "--format", "plain"], + "plain format with tablespaces fails without tablespace mapping", + ) + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backup1".format(tempdir), + "--format", + "plain", + "--tablespace-mapping", + "{}={}/tbackup/tblspc1".format(real_ts_dir, tempdir), + ], + "plain format with tablespaces succeeds with tablespace mapping", + ) + assert os.path.isdir( + "{}/tbackup/tblspc1".format(tempdir) + ), "tablespace was relocated" + _check_tablespace_symlink(pgdata, tempdir) + assert pypg.check_mode_recursive( + "{}/backup1".format(tempdir), 0o750, 0o640 + ), "check backup dir permissions" + + +def _check_tablespace_symlink(pgdata, tempdir): + """The relocated tablespace symlink under pg_tblspc points to the new dir.""" + found = False + for entry in os.listdir(pgdata / "pg_tblspc"): + link = "{}/backup1/pg_tblspc/{}".format(tempdir, entry) + if os.path.islink(link) and os.readlink(link) == "{}/tbackup/tblspc1".format( + tempdir + ): + found = True + assert found, "tablespace symlink was updated" + + +def _tablespace_exclusion_checks( + node, tempdir, postgres_oid, real_sys_tempdir, tblspc1_unlogged_path, tbl_spc1_id +): + """Unlogged/temp forks excluded from the relocated tablespace backup.""" + match = re.search(r"[^/]*/[^/]*/[^/]*$", tblspc1_unlogged_path) + assert match is not None + backup_path = match.group(0) + assert os.path.isfile( + "{}/tbackup/tblspc1/{}_init".format(tempdir, backup_path) + ), "unlogged init fork in tablespace backup" + assert not os.path.isfile( + "{}/tbackup/tblspc1/{}".format(tempdir, backup_path) + ), "unlogged main fork not in tablespace backup" + for filename in ["t888_888", "t888888_888888_vm.1"]: + assert not os.path.isfile( + "{}/tbackup/tblspc1/{}/{}/{}".format( + tempdir, tbl_spc1_id, postgres_oid, filename + ) + ), "[tblspc1]/{}/{} not copied".format(postgres_oid, filename) + # Remove temp relation files or tablespace drop will fail. + os.unlink( + "{}/tblspc1/{}/{}/{}".format( + real_sys_tempdir, tbl_spc1_id, postgres_oid, filename + ) + ) + assert os.path.isdir( + "{}/backup1/pg_replslot".format(tempdir) + ), "pg_replslot symlink copied as directory" + shutil.rmtree("{}/backup1".format(tempdir)) + + +def _tablespace_equals_and_longname(node, tempdir, real_sys_tempdir): + """Tablespace whose path contains '=' and a very long symlink target.""" + os.mkdir("{}/tbl=spc2".format(tempdir)) + real_ts_dir = "{}/tbl=spc2".format(real_sys_tempdir) + node.safe_psql("DROP TABLE test1;") + node.safe_psql("DROP TABLE tblspc1_unlogged;") + node.safe_psql("DROP TABLESPACE tblspc1;") + node.safe_psql("CREATE TABLESPACE tblspc2 LOCATION '{}';".format(real_ts_dir)) + escaped = real_ts_dir.replace("=", "\\=") + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backup3".format(tempdir), + "--format", + "plain", + "--tablespace-mapping", + "{}={}/tbackup/tbl\\=spc2".format(escaped, tempdir), + ], + "mapping tablespace with = sign in path", + ) + assert os.path.isdir( + "{}/tbackup/tbl=spc2".format(tempdir) + ), "tablespace with = sign was relocated" + node.safe_psql("DROP TABLESPACE tblspc2;") + shutil.rmtree("{}/backup3".format(tempdir)) + os.mkdir("{}/{}".format(tempdir, _SUPERLONGNAME)) + real_ts_dir = "{}/{}".format(real_sys_tempdir, _SUPERLONGNAME) + node.safe_psql("CREATE TABLESPACE tblspc3 LOCATION '{}';".format(real_ts_dir)) + node.command_ok( + _DEFS + ["--pgdata", "{}/tarbackup_l3".format(tempdir), "--format", "tar"], + "pg_basebackup tar with long symlink target", + ) + node.safe_psql("DROP TABLESPACE tblspc3;") + shutil.rmtree("{}/tarbackup_l3".format(tempdir)) + + +def _recovery_conf_and_xlog_modes(node, tempdir): + """--write-recovery-conf, default/fetch/stream/tar WAL modes, --no-slot.""" + node.command_ok( + _DEFS + ["--pgdata", "{}/backupR".format(tempdir), "--write-recovery-conf"], + "pg_basebackup --write-recovery-conf runs", + ) + assert os.path.isfile( + "{}/backupR/postgresql.auto.conf".format(tempdir) + ), "postgresql.auto.conf exists" + assert os.path.isfile( + "{}/backupR/standby.signal".format(tempdir) + ), "standby.signal was created" + recovery_conf = pypg.slurp_file("{}/backupR/postgresql.auto.conf".format(tempdir)) + shutil.rmtree("{}/backupR".format(tempdir)) + assert re.search( + r"(?m)^primary_conninfo = '.*port={}.*'\n".format(node.port), recovery_conf + ), "postgresql.auto.conf sets primary_conninfo" + for sub, flags, msg in [ + ("backupxd", [], "pg_basebackup runs in default xlog mode"), + ( + "backupxf", + ["--wal-method", "fetch"], + "pg_basebackup --wal-method fetch runs", + ), + ( + "backupxs", + ["--wal-method", "stream"], + "pg_basebackup --wal-method stream runs", + ), + ]: + node.command_ok(_DEFS + ["--pgdata", "{}/{}".format(tempdir, sub)] + flags, msg) + assert any( + re.match(r"^[0-9A-F]{24}$", f) + for f in pypg.slurp_dir("{}/{}/pg_wal".format(tempdir, sub)) + ), "WAL files copied" + shutil.rmtree("{}/{}".format(tempdir, sub)) + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backupxst".format(tempdir), + "--wal-method", + "stream", + "--format", + "tar", + ], + "pg_basebackup --wal-method stream runs in tar mode", + ) + assert os.path.isfile( + "{}/backupxst/pg_wal.tar".format(tempdir) + ), "tar file was created" + shutil.rmtree("{}/backupxst".format(tempdir)) + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backupnoslot".format(tempdir), + "--wal-method", + "stream", + "--no-slot", + ], + "pg_basebackup --wal-method stream runs with --no-slot", + ) + shutil.rmtree("{}/backupnoslot".format(tempdir)) + node.command_ok( + _DEFS + ["--pgdata", "{}/backupxf".format(tempdir), "--wal-method", "fetch"], + "pg_basebackup --wal-method fetch runs", + ) + + +def _backup_target_tests(node, tempdir): + """--target validation and the blackhole/server targets.""" + node.command_fails_like( + _DEFS + ["--target", "blackhole"], + r"WAL cannot be streamed when a backup target is specified", + "backup target requires --wal-method", + ) + node.command_fails_like( + _DEFS + ["--target", "blackhole", "--wal-method", "stream"], + r"WAL cannot be streamed when a backup target is specified", + "backup target requires --wal-method other than --wal-method stream", + ) + node.command_fails_like( + _DEFS + ["--target", "bogus", "--wal-method", "none"], + r"unrecognized target", + "backup target unrecognized", + ) + node.command_fails_like( + _DEFS + + [ + "--target", + "blackhole", + "--wal-method", + "none", + "--pgdata", + "{}/blackhole".format(tempdir), + ], + r"cannot specify both output directory and backup target", + "backup target and output directory", + ) + node.command_fails_like( + _DEFS + ["--target", "blackhole", "--wal-method", "none", "--format", "tar"], + r"cannot specify both format and backup target", + "backup target and format", + ) + node.command_ok( + _DEFS + ["--target", "blackhole", "--wal-method", "none"], + "backup target blackhole", + ) + node.command_ok( + _DEFS + + [ + "--target", + "server:{}/backuponserver".format(tempdir), + "--wal-method", + "none", + ], + "backup target server", + ) + assert os.path.isfile( + "{}/backuponserver/base.tar".format(tempdir) + ), "backup tar was created" + shutil.rmtree("{}/backuponserver".format(tempdir)) + node.command_ok( + ["createuser", "--replication", "--role=pg_write_server_files", "backupuser"], + "create backup user", + ) + node.command_ok( + _DEFS + + [ + "--username", + "backupuser", + "--target", + "server:{}/backuponserver".format(tempdir), + "--wal-method", + "none", + ], + "backup target server", + ) + assert os.path.isfile( + "{}/backuponserver/base.tar".format(tempdir) + ), "backup tar was created as non-superuser" + shutil.rmtree("{}/backuponserver".format(tempdir)) + + +def _slot_tests(node, tempdir): + """Slot-related failures, slot creation, restart_lsn advancement.""" + node.command_fails_like( + _DEFS + + [ + "--pgdata", + "{}/backupxs_sl_fail".format(tempdir), + "--wal-method", + "stream", + "--slot", + "slot0", + ], + r'replication slot "slot0" does not exist', + "pg_basebackup fails with nonexistent replication slot", + ) + node.command_fails_like( + _DEFS + ["--pgdata", "{}/backupxs_slot".format(tempdir), "--create-slot"], + r"--create-slot needs a slot to be specified using --slot", + "pg_basebackup --create-slot fails without slot name", + ) + node.command_fails_like( + _DEFS + + [ + "--pgdata", + "{}/backupxs_slot".format(tempdir), + "--create-slot", + "--slot", + "slot0", + "--no-slot", + ], + r"--no-slot cannot be used with slot name", + "pg_basebackup fails with --create-slot --slot --no-slot", + ) + node.command_fails_like( + _DEFS + ["--target", "blackhole", "--pgdata", "{}/blackhole".format(tempdir)], + r"cannot specify both output directory and backup target", + "backup target and output directory", + ) + node.command_ok( + _DEFS + ["--pgdata", "{}/backuptr/co".format(tempdir), "--wal-method", "none"], + "pg_basebackup --wal-method fetch runs", + ) + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backupxs_slot".format(tempdir), + "--create-slot", + "--slot", + "slot0", + ], + "pg_basebackup --create-slot runs", + ) + shutil.rmtree("{}/backupxs_slot".format(tempdir)) + assert ( + node.safe_psql( + "SELECT slot_name FROM pg_replication_slots WHERE slot_name = 'slot0'" + ) + == "slot0" + ), "replication slot was created" + assert ( + node.safe_psql( + "SELECT restart_lsn FROM pg_replication_slots WHERE slot_name = 'slot0'" + ) + != "" + ), "restart LSN of new slot is not null" + node.command_fails_like( + _DEFS + + [ + "--pgdata", + "{}/backupxs_slot1".format(tempdir), + "--create-slot", + "--slot", + "slot0", + ], + r'replication slot "slot0" already exists', + "pg_basebackup fails with --create-slot --slot and a previously existing slot", + ) + _slot1_tests(node, tempdir) + + +def _slot1_tests(node, tempdir): + """slot1 (physical, no reserve) advances restart_lsn during stream.""" + node.safe_psql("SELECT * FROM pg_create_physical_replication_slot('slot1')") + lsn = node.safe_psql( + "SELECT restart_lsn FROM pg_replication_slots WHERE slot_name = 'slot1'" + ) + assert lsn == "", "restart LSN of new slot is null" + node.command_fails( + _DEFS + + [ + "--pgdata", + "{}/fail".format(tempdir), + "--slot", + "slot1", + "--wal-method", + "none", + ], + "pg_basebackup with replication slot fails without WAL streaming", + ) + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backupxs_sl".format(tempdir), + "--wal-method", + "stream", + "--slot", + "slot1", + ], + "pg_basebackup --wal-method stream with replication slot runs", + ) + lsn = node.safe_psql( + "SELECT restart_lsn FROM pg_replication_slots WHERE slot_name = 'slot1'" + ) + assert re.match(r"^0/[0-9A-Z]{7,8}$", lsn), "restart LSN of slot has advanced" + shutil.rmtree("{}/backupxs_sl".format(tempdir)) + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backupxs_sl_R".format(tempdir), + "--wal-method", + "stream", + "--slot", + "slot1", + "--write-recovery-conf", + ], + "pg_basebackup with replication slot and --write-recovery-conf runs", + ) + assert re.search( + r"(?m)^primary_slot_name = 'slot1'\n", + pypg.slurp_file("{}/backupxs_sl_R/postgresql.auto.conf".format(tempdir)), + ), "recovery conf file sets primary_slot_name" + assert node.safe_psql("SHOW data_checksums;") == "on", "checksums are enabled" + shutil.rmtree("{}/backupxs_sl_R".format(tempdir)) + + +def _dbname_recovery_conf(node, tempdir): + """--dbname is written into the generated recovery conf.""" + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backup_dbname_R".format(tempdir), + "--wal-method", + "stream", + "--dbname", + "dbname=db1", + "--write-recovery-conf", + ], + "pg_basebackup with dbname and --write-recovery-conf runs", + ) + assert re.search( + r"(?m)dbname=db1", + pypg.slurp_file("{}/backup_dbname_R/postgresql.auto.conf".format(tempdir)), + ), "recovery conf file sets dbname" + shutil.rmtree("{}/backup_dbname_R".format(tempdir)) + + +def _checksum_corruption_tests(node, tempdir): + """Checksum-mismatch reporting: 1, capped at 5, total count, and -k bypass.""" + file_corrupt1 = node.safe_psql( + "CREATE TABLE corrupt1 AS SELECT a FROM generate_series(1,10000) AS a; " + "ALTER TABLE corrupt1 SET (autovacuum_enabled=false); " + "SELECT pg_relation_filepath('corrupt1')" + ) + file_corrupt2 = node.safe_psql( + "CREATE TABLE corrupt2 AS SELECT b FROM generate_series(1,2) AS b; " + "ALTER TABLE corrupt2 SET (autovacuum_enabled=false); " + "SELECT pg_relation_filepath('corrupt2')" + ) + block_size = int(node.safe_psql("SHOW block_size;")) + node.stop() + node.corrupt_page_checksum(file_corrupt1, 0) + node.start() + node.command_checks_all( + _DEFS + ["--pgdata", "{}/backup_corrupt".format(tempdir)], + 1, + [r"^$"], + [r"(?s)^WARNING.*checksum verification failed"], + "pg_basebackup reports checksum mismatch", + ) + shutil.rmtree("{}/backup_corrupt".format(tempdir)) + node.stop() + for i in range(1, 6): + node.corrupt_page_checksum(file_corrupt1, i * block_size) + node.start() + node.command_checks_all( + _DEFS + ["--pgdata", "{}/backup_corrupt2".format(tempdir)], + 1, + [r"^$"], + [r"(?s)^WARNING.*further.*failures.*will.not.be.reported"], + "pg_basebackup does not report more than 5 checksum mismatches", + ) + shutil.rmtree("{}/backup_corrupt2".format(tempdir)) + node.stop() + node.corrupt_page_checksum(file_corrupt2, 0) + node.start() + node.command_checks_all( + _DEFS + ["--pgdata", "{}/backup_corrupt3".format(tempdir)], + 1, + [r"^$"], + [r"(?s)^WARNING.*7 total checksum verification failures"], + "pg_basebackup correctly report the total number of checksum mismatches", + ) + shutil.rmtree("{}/backup_corrupt3".format(tempdir)) + node.command_ok( + _DEFS + + ["--pgdata", "{}/backup_corrupt4".format(tempdir), "--no-verify-checksums"], + "pg_basebackup with -k does not report checksum mismatch", + ) + shutil.rmtree("{}/backup_corrupt4".format(tempdir)) + node.safe_psql("DROP TABLE corrupt1;") + node.safe_psql("DROP TABLE corrupt2;") + + +def _compression_methods(node, tempdir): + """ZLIB compression: --compress, --gzip, gzip:1, file naming, integrity.""" + if not pypg.check_pg_config(r"#define HAVE_LIBZ 1"): + return + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backup_gzip".format(tempdir), + "--compress", + "1", + "--format", + "t", + ], + "pg_basebackup with --compress", + ) + node.command_ok( + _DEFS + + ["--pgdata", "{}/backup_gzip2".format(tempdir), "--gzip", "--format", "t"], + "pg_basebackup with --gzip", + ) + node.command_ok( + _DEFS + + [ + "--pgdata", + "{}/backup_gzip3".format(tempdir), + "--compress", + "gzip:1", + "--format", + "t", + ], + "pg_basebackup with --compress=gzip:1", + ) + zlib_files = glob.glob("{}/backup_gzip/*.tar.gz".format(tempdir)) + assert ( + len(zlib_files) == 2 + ), "two files created with --compress=NUM (base.tar.gz and pg_wal.tar.gz)" + zlib_files2 = glob.glob("{}/backup_gzip2/*.tar.gz".format(tempdir)) + assert ( + len(zlib_files2) == 2 + ), "two files created with --gzip (base.tar.gz and pg_wal.tar.gz)" + zlib_files3 = glob.glob("{}/backup_gzip3/*.tar.gz".format(tempdir)) + assert ( + len(zlib_files3) == 2 + ), "two files created with --compress=gzip:NUM (base.tar.gz and pg_wal.tar.gz)" + gzip = os.environ.get("GZIP_PROGRAM") + if gzip: + result = node.bin.run_command( + [gzip, "--test"] + zlib_files + zlib_files2 + zlib_files3 + ) + assert result.rc == 0, "gzip verified the integrity of compressed data" + shutil.rmtree("{}/backup_gzip".format(tempdir)) + shutil.rmtree("{}/backup_gzip2".format(tempdir)) + shutil.rmtree("{}/backup_gzip3".format(tempdir)) + + +def _sigchld_test(node, tempdir): + """A killed background stream process makes pg_basebackup exit with an error.""" + node.safe_psql("CREATE TABLE t AS SELECT a FROM generate_series(1,10000) AS a;") + timeout = pypg.test_timeout_default() + # pg_basebackup uses PGAPPNAME as its fallback application_name, which the + # walsender then reports in pg_stat_activity (mirrors the Perl harness + # setting PGAPPNAME to the test file's basename). + appname = "test_010_pg_basebackup.py" + cmd = [ + str(node.bin_dir / "pg_basebackup"), + "--no-sync", + "-cfast", + "--wal-method=stream", + "--pgdata", + "{}/sigchld".format(tempdir), + "--max-rate", + "32", + "--dbname", + node.connstr("postgres"), + ] + env = dict(node.connenv) + env["PGAPPNAME"] = appname + bg = _BgCommand(cmd, env, timeout) + try: + assert node.poll_query_until( + "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE " + "application_name = '{}' AND wait_event = 'WalSenderMain' " + "AND backend_type = 'walsender' AND query ~ 'START_REPLICATION'".format( + appname + ) + ), "Walsender killed" + assert bg.pump_until( + r"background process terminated unexpectedly" + ), "background process exit message" + finally: + bg.finish() + + +def _in_place_tablespace_backup(node): + """Back up a cluster containing an in-place tablespace.""" + node.safe_psql( + "SET allow_in_place_tablespaces = on; CREATE TABLESPACE tblspc2 LOCATION '';" + ) + node.safe_psql( + "CREATE TABLE test2 (a int) TABLESPACE tblspc2;INSERT INTO test2 VALUES (1234);" + ) + tblspc_oid = node.safe_psql( + "SELECT oid FROM pg_tablespace WHERE spcname = 'tblspc2';" + ) + node.backup("backup3") + node.safe_psql("DROP TABLE test2;") + node.safe_psql("DROP TABLESPACE tblspc2;") + backupdir = "{}/backup3".format(node.backup_dir) + dst_tblspc = glob.glob("{}/pg_tblspc/{}/PG_*".format(backupdir, tblspc_oid)) + assert len(dst_tblspc) == 1, "tblspc directory copied" + return backupdir + + +def _different_sysid_manifest(node, create_pg, tempdir, backupdir): + """Incremental backup against a manifest from a different cluster fails.""" + node2 = create_pg( + "node2", + force_initdb=True, + has_archiving=True, + allows_streaming=True, + start=False, + ) + node2.append_conf("summarize_wal = on") + node2.start() + node2.command_fails_like( + _DEFS + + [ + "--pgdata", + "{}/diff_sysid".format(tempdir), + "--incremental", + "{}/backup_manifest".format(backupdir), + ], + r"system identifier in backup manifest is .*, but database system identifier is", + "pg_basebackup fails with different database system manifest", + ) + + +def test_010_pg_basebackup(create_pg, pg_bin, tmp_path): + """End-to-end pg_basebackup coverage mirroring 010_pg_basebackup.pl.""" + pg_bin.program_help_ok("pg_basebackup") + pg_bin.program_version_ok("pg_basebackup") + pg_bin.program_options_handling_ok("pg_basebackup") + os.umask(0o077) + tempdir = str(tmp_path / "tempdir") + os.mkdir(tempdir) + os.mkdir("{}/pgdata".format(tempdir)) + node = create_pg( + "main", + extra=["--data-checksums"], + auth_extra=["--create-role", "backupuser"], + start=False, + ) + # Mirror Cluster->init without allows_streaming: a non-streaming primary is + # configured with minimal WAL so the first pg_basebackup attempt fails for + # WAL-configuration reasons (the test enables replication later). + node.append_conf("wal_level = minimal\nmax_wal_senders = 0\n") + node.start() + pgdata = node.datadir + + _option_and_wal_config_failures(node, tempdir, pgdata) + _compression_failure_tests(node, tempdir) + _write_donotcopy_files(pgdata) + base_unlogged_path, postgres_oid = _setup_relations_for_exclusion(node, pgdata) + _run_first_backup_and_check_exclusions( + node, tempdir, pgdata, base_unlogged_path, postgres_oid + ) + _waldir_and_tar_format(node, tempdir) + _long_name_tar(node, tempdir, pgdata) + + _sys_tempdir, real_sys_tempdir = _setup_symlinks(node, pgdata, tempdir) + _tablespace_tar_backup(node, create_pg, tempdir, real_sys_tempdir) + real_ts_dir = "{}/tblspc1".format(real_sys_tempdir) + tblspc1_unlogged_path, tbl_spc1_id = _tablespace_unlogged_temp_setup( + node, pgdata, real_sys_tempdir, postgres_oid + ) + _tablespace_plain_backup(node, tempdir, pgdata, real_ts_dir) + _tablespace_exclusion_checks( + node, + tempdir, + postgres_oid, + real_sys_tempdir, + tblspc1_unlogged_path, + tbl_spc1_id, + ) + _tablespace_equals_and_longname(node, tempdir, real_sys_tempdir) + + _recovery_conf_and_xlog_modes(node, tempdir) + _backup_target_tests(node, tempdir) + _slot_tests(node, tempdir) + _dbname_recovery_conf(node, tempdir) + _checksum_corruption_tests(node, tempdir) + _compression_methods(node, tempdir) + _sigchld_test(node, tempdir) + backupdir = _in_place_tablespace_backup(node) + _different_sysid_manifest(node, create_pg, tempdir, backupdir) diff --git a/src/bin/pg_basebackup/pyt/test_011_in_place_tablespace.py b/src/bin/pg_basebackup/pyt/test_011_in_place_tablespace.py new file mode 100644 index 0000000000000..7c664419935e4 --- /dev/null +++ b/src/bin/pg_basebackup/pyt/test_011_in_place_tablespace.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_basebackup/t/011_in_place_tablespace.pl. + +A tar-format base backup of a cluster with an in-place tablespace produces both +the main base.tar and exactly one numbered tablespace tar. +""" + +import glob +import os +import tempfile + + +def test_011_in_place_tablespace(create_pg): + """tar-format backup emits base.tar plus one tablespace tar.""" + tempdir = tempfile.mkdtemp(prefix="ipts_") + node = create_pg("main", allows_streaming=True) + node.safe_psql( + "SET allow_in_place_tablespaces = on;\n" + "CREATE TABLESPACE inplace LOCATION '';" + ) + backupdir = tempdir + "/backup" + node.command_ok( + [ + "pg_basebackup", + "--no-sync", + "--checkpoint", + "fast", + "--pgdata", + backupdir, + "--format", + "tar", + "--wal-method", + "none", + ], + "pg_basebackup runs", + ) + assert os.path.isfile("{}/base.tar".format(backupdir)), "backup tar was created" + tblspc_tars = glob.glob("{}/[0-9]*.tar".format(backupdir)) + assert len(tblspc_tars) == 1, "one tablespace tar was created" diff --git a/src/bin/pg_basebackup/pyt/test_020_pg_receivewal.py b/src/bin/pg_basebackup/pyt/test_020_pg_receivewal.py new file mode 100644 index 0000000000000..f1528b8b39d28 --- /dev/null +++ b/src/bin/pg_basebackup/pyt/test_020_pg_receivewal.py @@ -0,0 +1,289 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_basebackup/t/020_pg_receivewal.pl. + +pg_receivewal option/usage handling, slot create/drop, synchronous streaming to +.partial then completed segments, optional gzip/lz4/zstd compression (skipped +when the build lacks them), streaming from a slot's restart_lsn, stream-dir +permissions, and resuming across a standby promotion / timeline jump. +""" + +import glob +import os +import re + +import pypg + + +def _glob1(pattern, msg): + matches = glob.glob(pattern) + assert len(matches) == 1, msg + return matches[0] + + +def test_020_pg_receivewal(create_pg, pg_bin): + """pg_receivewal usage, slots, compression, restart_lsn, and timeline jump.""" + os.umask(0o077) + pg_bin.program_help_ok("pg_receivewal") + pg_bin.program_version_ok("pg_receivewal") + pg_bin.program_options_handling_ok("pg_receivewal") + primary = create_pg( + "primary", allows_streaming=True, extra=["--wal-segsize=1"], start=False + ) + primary.start() + stream_dir = "{}/archive_wal".format(primary.basedir) + os.mkdir(stream_dir) + primary.command_fails( + ["pg_receivewal"], "pg_receivewal needs target directory specified" + ) + primary.command_fails( + ["pg_receivewal", "--directory", stream_dir, "--create-slot", "--drop-slot"], + "failure if both --create-slot and --drop-slot specified", + ) + primary.command_fails( + ["pg_receivewal", "--directory", stream_dir, "--create-slot"], + "failure if --create-slot specified without --slot", + ) + primary.command_fails( + ["pg_receivewal", "--directory", stream_dir, "--synchronous", "--no-sync"], + "failure if --synchronous specified with --no-sync", + ) + primary.command_fails_like( + ["pg_receivewal", "--directory", stream_dir, "--compress", "none:1"], + r'pg_receivewal: error: invalid compression specification: compression algorithm "none" does not accept a compression level', + "failure if --compress none:N (where N > 0)", + ) + slot_name = "test" + primary.command_ok( + ["pg_receivewal", "--slot", slot_name, "--create-slot"], + "creating a replication slot", + ) + slot = primary.slot(slot_name) + assert slot["slot_type"] == "physical", "physical replication slot was created" + assert slot["restart_lsn"] == "", "restart LSN of new slot is null" + primary.command_ok( + ["pg_receivewal", "--slot", slot_name, "--drop-slot"], + "dropping a replication slot", + ) + assert primary.slot(slot_name)["slot_type"] == "", "replication slot was removed" + primary.psql_capture("CREATE TABLE test_table(x integer PRIMARY KEY);") + primary.psql_capture("SELECT pg_switch_wal();") + nextlsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn();") + primary.psql_capture("INSERT INTO test_table VALUES (1);") + primary.command_ok( + [ + "pg_receivewal", + "--directory", + stream_dir, + "--verbose", + "--endpos", + nextlsn, + "--synchronous", + "--no-loop", + ], + "streaming some WAL with --synchronous", + ) + partial_wal = _glob1( + "{}/*.partial".format(stream_dir), "one partial WAL segment was created" + ) + partial_wal = _compression_blocks(primary, stream_dir, partial_wal) + primary.psql_capture("SELECT pg_switch_wal();") + nextlsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn();") + primary.psql_capture("INSERT INTO test_table VALUES (4);") + primary.command_ok( + [ + "pg_receivewal", + "--directory", + stream_dir, + "--verbose", + "--endpos", + nextlsn, + "--no-loop", + ], + "streaming some WAL", + ) + completed = re.sub(r"(\.gz|\.lz4)?\.partial", "", partial_wal) + assert os.path.exists( + completed + ), "check that previously partial WAL is now complete" + assert pypg.check_mode_recursive( + stream_dir, 0o700, 0o600 + ), "check stream dir permissions" + _slot_restart_lsn_streaming(primary) + _timeline_jump(create_pg, primary) + + +def _compression_blocks(primary, stream_dir, partial_wal): + """Run the gzip/lz4/zstd streaming sub-tests the build supports.""" + if pypg.check_pg_config(r"#define HAVE_LIBZ 1"): + primary.psql_capture("SELECT pg_switch_wal();") + nextlsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn();") + primary.psql_capture("INSERT INTO test_table VALUES (2);") + primary.command_ok( + [ + "pg_receivewal", + "--directory", + stream_dir, + "--verbose", + "--endpos", + nextlsn, + "--compress", + "gzip:1", + "--no-loop", + ], + "streaming some WAL using ZLIB compression", + ) + zlib_wal = _glob1( + "{}/*.gz".format(stream_dir), + "one WAL segment compressed with ZLIB was created", + ) + partial_wal = _glob1( + "{}/*.gz.partial".format(stream_dir), + "one partial WAL segment compressed with ZLIB was created", + ) + gzip = os.environ.get("GZIP_PROGRAM") + if gzip: + assert ( + primary.bin.run_command([gzip, "--test", zlib_wal]).rc == 0 + ), "gzip verified the integrity of compressed WAL segments" + if pypg.check_pg_config(r"#define USE_LZ4 1"): + primary.psql_capture("SELECT pg_switch_wal();") + nextlsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn();") + primary.psql_capture("INSERT INTO test_table VALUES (3);") + primary.command_ok( + [ + "pg_receivewal", + "--directory", + stream_dir, + "--verbose", + "--endpos", + nextlsn, + "--no-loop", + "--compress", + "lz4", + ], + "streaming some WAL using --compress=lz4", + ) + lz4_wal = _glob1( + "{}/*.lz4".format(stream_dir), + "one WAL segment compressed with LZ4 was created", + ) + partial_wal = _glob1( + "{}/*.lz4.partial".format(stream_dir), + "one partial WAL segment compressed with LZ4 was created", + ) + lz4 = os.environ.get("LZ4") + if lz4: + assert ( + primary.bin.run_command([lz4, "-t", lz4_wal]).rc == 0 + ), "lz4 verified the integrity of compressed WAL segments" + return partial_wal + + +def _slot_restart_lsn_streaming(primary): + """Stream WAL starting from a slot's restart_lsn into a dedicated dir.""" + slot_dir = "{}/slot_wal".format(primary.basedir) + os.mkdir(slot_dir) + slot_name = "archive_slot" + primary.psql_capture("checkpoint;") + primary.psql_capture( + "SELECT pg_create_physical_replication_slot('{}', true);".format(slot_name) + ) + walfile_streamed = primary.safe_psql( + "SELECT pg_walfile_name(restart_lsn) FROM pg_replication_slots " + "WHERE slot_name = '{}';".format(slot_name) + ) + primary.psql_capture("INSERT INTO test_table VALUES (5);") + primary.psql_capture("SELECT pg_switch_wal();") + nextlsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn();") + primary.psql_capture("INSERT INTO test_table VALUES (6);") + primary.command_fails_like( + [ + "pg_receivewal", + "--directory", + slot_dir, + "--slot", + "nonexistentslot", + "--no-loop", + "--no-sync", + "--verbose", + "--endpos", + nextlsn, + ], + r'pg_receivewal: error: replication slot "nonexistentslot" does not exist', + "pg_receivewal fails with non-existing slot", + ) + primary.command_ok( + [ + "pg_receivewal", + "--directory", + slot_dir, + "--slot", + slot_name, + "--no-loop", + "--no-sync", + "--verbose", + "--endpos", + nextlsn, + ], + "WAL streamed from the slot's restart_lsn", + ) + assert os.path.exists( + "{}/{}".format(slot_dir, walfile_streamed) + ), "WAL from the slot's restart_lsn has been archived" + + +def _timeline_jump(create_pg, primary): + """After a standby promotion, resume streaming across the timeline jump.""" + backup_name = "basebackup" + primary.backup(backup_name) + standby = create_pg( + "standby", from_backup=(primary, backup_name), has_streaming=True, start=False + ) + standby.start() + archive_slot = "archive_slot" + standby.psql_capture( + "CREATE_REPLICATION_SLOT {} PHYSICAL (RESERVE_WAL)".format(archive_slot), + dbname="", + replication="1", + ) + primary.wait_for_catchup(standby) + replication_slot_lsn = standby.slot(archive_slot)["restart_lsn"] + walfile_before = primary.safe_psql( + "SELECT pg_walfile_name('{}');".format(replication_slot_lsn) + ) + standby.promote() + walfile_after = standby.safe_psql( + "SELECT pg_walfile_name(pg_current_wal_insert_lsn());" + ) + standby.psql_capture("INSERT INTO test_table VALUES (7);") + standby.psql_capture("SELECT pg_switch_wal();") + nextlsn = standby.safe_psql("SELECT pg_current_wal_insert_lsn();") + standby.psql_capture("INSERT INTO test_table VALUES (8);") + timeline_dir = "{}/timeline_wal".format(primary.basedir) + os.mkdir(timeline_dir) + standby.command_ok( + [ + "pg_receivewal", + "--directory", + timeline_dir, + "--verbose", + "--endpos", + nextlsn, + "--slot", + archive_slot, + "--no-sync", + "--no-loop", + ], + "Stream some wal after promoting, resuming from the slot's position", + ) + assert os.path.exists( + "{}/{}".format(timeline_dir, walfile_before) + ), "WAL segment {} archived after timeline jump".format(walfile_before) + assert os.path.exists( + "{}/{}".format(timeline_dir, walfile_after) + ), "WAL segment {} archived after timeline jump".format(walfile_after) + assert os.path.exists( + "{}/00000002.history".format(timeline_dir) + ), "timeline history file archived after timeline jump" diff --git a/src/bin/pg_basebackup/pyt/test_030_pg_recvlogical.py b/src/bin/pg_basebackup/pyt/test_030_pg_recvlogical.py new file mode 100644 index 0000000000000..f9d39fdddac92 --- /dev/null +++ b/src/bin/pg_basebackup/pyt/test_030_pg_recvlogical.py @@ -0,0 +1,202 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_basebackup/t/030_pg_recvlogical.pl. + +pg_recvlogical option/usage handling and behavior: required slot/database/ +action, slot create/start/drop, two-phase and failover slot creation, and a +reconnection test where the streaming consumer is terminated server-side and +resumes, ultimately writing both committed INSERTs to its output file. +""" + +import signal + +import pypg + + +def _cs(node): + return node.connstr("postgres") + + +def test_030_pg_recvlogical(create_pg, pg_bin): + """pg_recvlogical usage paths, slot lifecycle, two-phase/failover, reconnect.""" + pg_bin.program_help_ok("pg_recvlogical") + pg_bin.program_version_ok("pg_recvlogical") + pg_bin.program_options_handling_ok("pg_recvlogical") + node = create_pg("main", allows_streaming=True, has_archiving=True, start=False) + node.append_conf( + "\nwal_level = 'logical'\nmax_replication_slots = 4\nmax_wal_senders = 4\n" + "log_min_messages = 'debug1'\nlog_error_verbosity = verbose\n" + "max_prepared_transactions = 10\n" + ) + node.start() + node.command_fails(["pg_recvlogical"], "pg_recvlogical needs a slot name") + node.command_fails( + ["pg_recvlogical", "--slot", "test"], "pg_recvlogical needs a database" + ) + node.command_fails( + ["pg_recvlogical", "--slot", "test", "--dbname", "postgres"], + "pg_recvlogical needs an action", + ) + node.command_fails( + ["pg_recvlogical", "--slot", "test", "--dbname", _cs(node), "--start"], + "no destination file", + ) + node.command_ok( + ["pg_recvlogical", "--slot", "test", "--dbname", _cs(node), "--create-slot"], + "slot created", + ) + assert node.slot("test")["restart_lsn"] != "", "restart lsn is defined for new slot" + node.psql_capture("CREATE TABLE test_table(x integer)") + node.psql_capture( + "INSERT INTO test_table(x) SELECT y FROM generate_series(1, 10) a(y);" + ) + nextlsn = node.safe_psql("SELECT pg_current_wal_insert_lsn()") + node.command_ok( + [ + "pg_recvlogical", + "--slot", + "test", + "--dbname", + _cs(node), + "--start", + "--endpos", + nextlsn, + "--no-loop", + "--file", + "-", + ], + "replayed a transaction", + ) + node.command_ok( + ["pg_recvlogical", "--slot", "test", "--dbname", _cs(node), "--drop-slot"], + "slot dropped", + ) + node.command_ok( + [ + "pg_recvlogical", + "--slot", + "test", + "--dbname", + _cs(node), + "--create-slot", + "--two-phase", + ], + "slot with two-phase created", + ) + assert node.slot("test")["restart_lsn"] != "", "restart lsn is defined for new slot" + node.safe_psql( + "BEGIN; INSERT INTO test_table values (11); PREPARE TRANSACTION 'test'" + ) + node.safe_psql("COMMIT PREPARED 'test'") + nextlsn = node.safe_psql("SELECT pg_current_wal_insert_lsn()") + node.command_fails( + [ + "pg_recvlogical", + "--slot", + "test", + "--dbname", + _cs(node), + "--start", + "--endpos", + nextlsn, + "--enable-two-phase", + "--no-loop", + "--file", + "-", + ], + "incorrect usage", + ) + node.command_ok( + [ + "pg_recvlogical", + "--slot", + "test", + "--dbname", + _cs(node), + "--start", + "--endpos", + nextlsn, + "--no-loop", + "--file", + "-", + ], + "replayed a two-phase transaction", + ) + node.command_ok( + ["pg_recvlogical", "--slot", "test", "--drop-slot"], + "drop could work without dbname", + ) + node.command_ok( + [ + "pg_recvlogical", + "--slot", + "test", + "--dbname", + _cs(node), + "--create-slot", + "--enable-failover", + ], + "slot with failover created", + ) + assert ( + node.safe_psql( + "SELECT failover FROM pg_catalog.pg_replication_slots " + "WHERE slot_name = 'test'" + ) + == "t" + ), "failover is enabled for the new slot" + _reconnect_test(node) + + +def _reconnect_test(node): + """A terminated streaming consumer reconnects and writes both INSERTs.""" + outfile = "{}/reconnect.out".format(node.basedir) + node.command_ok( + [ + "pg_recvlogical", + "--slot", + "reconnect_test", + "--dbname", + _cs(node), + "--create-slot", + ], + "slot created for reconnection test", + ) + node.safe_psql("INSERT INTO test_table VALUES (1)") + cmd = [ + "pg_recvlogical", + "--slot", + "reconnect_test", + "--dbname", + _cs(node), + "--start", + "--file", + outfile, + "--fsync-interval", + "1", + "--status-interval", + "100", + "--verbose", + ] + recv = node.bin.popen(cmd) + try: + first_ins = pypg.wait_for_file(outfile, r"INSERT") + backend_pid = node.safe_psql( + "SELECT active_pid FROM pg_replication_slots " + "WHERE slot_name = 'reconnect_test'" + ) + node.safe_psql("SELECT pg_terminate_backend({})".format(backend_pid)) + assert node.poll_query_until( + "SELECT active_pid IS NOT NULL AND active_pid != {} " + "FROM pg_replication_slots WHERE slot_name = 'reconnect_test'".format( + backend_pid + ) + ), "Timed out while waiting for pg_recvlogical to reconnect" + node.safe_psql("INSERT INTO test_table VALUES (2)") + pypg.wait_for_file(outfile, r"INSERT", first_ins) + finally: + recv.send_signal(signal.SIGTERM) + recv.wait() + outfiledata = pypg.slurp_file(outfile) + count = outfiledata.count("INSERT") + assert count == 2, "pg_recvlogical has received and written two INSERTs" diff --git a/src/bin/pg_basebackup/pyt/test_040_pg_createsubscriber.py b/src/bin/pg_basebackup/pyt/test_040_pg_createsubscriber.py new file mode 100644 index 0000000000000..4591678c6792d --- /dev/null +++ b/src/bin/pg_basebackup/pyt/test_040_pg_createsubscriber.py @@ -0,0 +1,750 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_basebackup/t/040_pg_createsubscriber.pl. + +Use a standby server as the subscriber: build a publisher (node P) and a +streaming standby (node S), exercise pg_createsubscriber's option validation +and unmet-condition checks, then convert node S into a logical subscriber and +verify replication, --clean, --enable-two-phase, failover-slot removal, and +that a new physical standby (node K) of the promoted subscriber starts cleanly. +""" + +import glob +import os +import re + +import pypg + +# Skip BEL, LF, and CR when generating database names from a character range. +_SKIP_CHARS = (7, 10, 13) + + +def _generate_db(node, prefix, from_char, to_char, suffix): + """Create a database whose name spans an ASCII range, return the name. + + Mirrors the generate_db helper extracted from 002_pg_upgrade.pl: build a + name from prefix + characters from_char..to_char (skipping BEL/LF/CR) + + suffix, then createdb it. On non-Windows the quotes/backslashes are kept + verbatim (the Windows IPC::Run quoting workaround does not apply here). + """ + dbname = prefix + for i in range(from_char, to_char + 1): + if i in _SKIP_CHARS: + continue + dbname += chr(i) + dbname += suffix + node.command_ok( + ["createdb", dbname], + "created database with ASCII characters from {} to {}".format( + from_char, to_char + ), + ) + return dbname + + +def _setup_publisher(create_pg): + """Init node P (publisher) and node F (about-to-fail, force_initdb).""" + node_p = create_pg("node_p", allows_streaming="logical", start=False) + # Disable autovacuum to avoid generating xid during stats update as otherwise + # the new XID could then be replicated to standby at some random point making + # slots at primary lag behind standby during slot sync. + node_p.append_conf("autovacuum = off") + node_p.start() + # Force node F to initialize a new cluster instead of copying a previously + # initdb'd cluster. A new cluster has a different system identifier so we can + # test that the target cluster is a copy of the source cluster. + node_f = create_pg( + "node_f", force_initdb=True, allows_streaming="logical", start=False + ) + return node_p, node_f + + +def _mandatory_option_failures(pg_bin, datadir): + """pg_createsubscriber rejects missing/duplicate/mismatched arguments.""" + pg_bin.command_fails( + ["pg_createsubscriber"], "no subscriber data directory specified" + ) + pg_bin.command_fails( + ["pg_createsubscriber", "--pgdata", datadir], + "no publisher connection string specified", + ) + base = [ + "pg_createsubscriber", + "--verbose", + "--pgdata", + datadir, + "--publisher-server", + "port=5432", + ] + pg_bin.command_fails(base, "no database name specified") + pg_bin.command_fails( + base + ["--database", "pg1", "--database", "pg1"], + "duplicate database name", + ) + pg_bin.command_fails( + base + + [ + "--publication", + "foo1", + "--publication", + "foo1", + "--database", + "pg1", + "--database", + "pg2", + ], + "duplicate publication name", + ) + pg_bin.command_fails( + base + ["--publication", "foo1", "--database", "pg1", "--database", "pg2"], + "wrong number of publication names", + ) + pg_bin.command_fails( + base + + [ + "--publication", + "foo1", + "--publication", + "foo2", + "--subscription", + "bar1", + "--database", + "pg1", + "--database", + "pg2", + ], + "wrong number of subscription names", + ) + pg_bin.command_fails( + base + + [ + "--publication", + "foo1", + "--publication", + "foo2", + "--subscription", + "bar1", + "--subscription", + "bar2", + "--replication-slot", + "baz1", + "--database", + "pg1", + "--database", + "pg2", + ], + "wrong number of replication slot names", + ) + + +def _populate_publisher(node_p, db1, db2): + """Create tables, a row, and a physical slot on the publisher.""" + node_p.safe_psql("CREATE TABLE tbl1 (a text)", dbname=db1) + node_p.safe_psql("INSERT INTO tbl1 VALUES('first row')", dbname=db1) + node_p.safe_psql("CREATE TABLE tbl2 (a text)", dbname=db2) + slotname = "physical_slot" + node_p.safe_psql( + "SELECT pg_create_physical_replication_slot('{}')".format(slotname), + dbname=db2, + ) + return slotname + + +def _setup_standby(create_pg, node_p, slotname): + """Init node S as a streaming standby of node P, return it started.""" + pconnstr = node_p.connstr() + node_p.backup("backup_1") + node_s = create_pg( + "node_s", from_backup=(node_p, "backup_1"), has_streaming=True, start=False + ) + node_s.append_conf( + "\nprimary_slot_name = '{}'\n" + "primary_conninfo = '{} dbname=postgres'\n" + "hot_standby_feedback = on\n".format(slotname, pconnstr) + ) + node_s.set_standby_mode() + node_s.start() + return node_s + + +def _promoted_and_about_to_fail_checks(pg_bin, create_pg, node_p, node_s, db1, db2): + """Failures on a promoted server, a running standby, and a forced-init node.""" + node_t = create_pg( + "node_t", from_backup=(node_p, "backup_1"), has_streaming=True, start=False + ) + node_t.set_standby_mode() + node_t.start() + node_t.promote() + node_t.stop() + pg_bin.command_fails( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--pgdata", + node_t.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_t.host, + "--subscriber-port", + node_t.port, + "--database", + db1, + "--database", + db2, + ], + "target server is not in recovery", + ) + pg_bin.command_fails( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--database", + db1, + "--database", + db2, + ], + "standby is up and running", + ) + return node_t + + +def _about_to_fail_node_check(pg_bin, node_f, node_p, db1, db2): + """A node initialized as a fresh cluster is not a copy of the source.""" + pg_bin.command_fails( + [ + "pg_createsubscriber", + "--verbose", + "--pgdata", + node_f.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_f.host, + "--subscriber-port", + node_f.port, + "--database", + db1, + "--database", + db2, + ], + "subscriber data directory is not a copy of the source database cluster", + ) + + +def _cascading_standby_check(pg_bin, create_pg, node_s, db1, db2): + """pg_createsubscriber on a cascaded standby (P -> S -> C) fails.""" + node_s.backup("backup_2") + node_c = create_pg( + "node_c", from_backup=(node_s, "backup_2"), has_streaming=True, start=False + ) + node_c.adjust_conf("primary_slot_name", None) + node_c.set_standby_mode() + pg_bin.command_fails( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--pgdata", + node_c.datadir, + "--publisher-server", + node_s.dbname_connstr(db1), + "--socketdir", + node_c.host, + "--subscriber-port", + node_c.port, + "--database", + db1, + "--database", + db2, + ], + "primary server is in recovery", + ) + + +def _unmet_conditions_checks(pg_bin, node_p, node_s, db1, db2): + """Unmet-condition failures on the primary and on the standby.""" + node_p.append_conf( + "\nmax_replication_slots = 1\n" + "max_wal_senders = 1\n" + "max_worker_processes = 2\n" + ) + node_p.restart() + node_s.stop() + pg_bin.command_fails( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--database", + db1, + "--database", + db2, + ], + "primary contains unmet conditions on node P", + ) + # Restore default settings here but only apply it after testing standby. + # Some standby settings should not be a lower setting than on the primary. + node_p.append_conf( + "\nmax_replication_slots = 10\n" + "max_wal_senders = 10\n" + "max_worker_processes = 8\n" + ) + node_s.append_conf( + "\nmax_active_replication_origins = 1\n" + "max_logical_replication_workers = 1\n" + "max_worker_processes = 2\n" + ) + pg_bin.command_fails( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--database", + db1, + "--database", + db2, + ], + "standby contains unmet conditions on node S", + ) + node_s.append_conf( + "\nmax_active_replication_origins = 10\n" + "max_logical_replication_workers = 4\n" + "max_worker_processes = 8\n" + ) + node_p.restart() + + +def _prepare_objects_for_removal(node_p, node_s, db1): + """Create a failover slot, dummy subscription, and publications to remove.""" + fslotname = "failover_slot" + node_p.safe_psql( + "SELECT pg_create_logical_replication_slot(" + "'{}', 'pgoutput', false, false, true)".format(fslotname), + dbname=db1, + ) + node_s.start() + # Wait for the standby to catch up so that it is not lagging behind the + # failover slot. + node_p.wait_for_replay_catchup(node_s) + node_s.safe_psql("SELECT pg_sync_replication_slots()") + result = node_s.safe_psql( + "SELECT slot_name FROM pg_replication_slots " + "WHERE slot_name = '{}' AND synced AND NOT temporary".format(fslotname) + ) + assert result == "failover_slot", "failover slot is synced" + # Insert another row after syncing the logical slot (otherwise the local + # slot's xmin on the standby could be ahead of the remote slot, failing + # synchronization). + node_p.safe_psql("INSERT INTO tbl1 VALUES('second row')", dbname=db1) + node_p.wait_for_replay_catchup(node_s) + dummy_sub = "regress_sub_dummy" + node_p.safe_psql( + "CREATE SUBSCRIPTION {} CONNECTION 'dbname=dummy' " + "PUBLICATION pub_dummy WITH (connect=false)".format(dummy_sub), + dbname=db1, + ) + node_p.wait_for_replay_catchup(node_s) + node_p.safe_psql( + "CREATE PUBLICATION test_pub1 FOR ALL TABLES;\n" + "CREATE PUBLICATION test_pub2 FOR ALL TABLES;", + dbname=db1, + ) + node_p.wait_for_replay_catchup(node_s) + assert ( + node_s.safe_psql("SELECT COUNT(*) FROM pg_publication", dbname=db1) == "2" + ), "two pre-existing publications on subscriber" + node_s.stop() + return fslotname, dummy_sub + + +def _dry_run_and_logdir(pg_bin, node_p, node_s, logdir, db1, db2): + """--dry-run on node S, checking the created log files and their contents.""" + pg_bin.command_ok( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--recovery-timeout", + pypg.test_timeout_default(), + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--publication", + "pub1", + "--publication", + "pub2", + "--subscription", + "sub1", + "--subscription", + "sub2", + "--database", + db1, + "--database", + db2, + "--logdir", + logdir, + ], + "run pg_createsubscriber --dry-run on node S", + ) + server_log_files = glob.glob("{}/*/pg_createsubscriber_server.log".format(logdir)) + assert len(server_log_files) == 1, "pg_createsubscriber_server.log file was created" + assert ( + os.path.getsize(server_log_files[0]) != 0 + ), "pg_createsubscriber_server.log file not empty" + server_log = pypg.slurp_file(server_log_files[0]) + assert re.search( + r"consistent recovery state reached", server_log + ), "server reached consistent recovery state" + internal_log_files = glob.glob( + "{}/*/pg_createsubscriber_internal.log".format(logdir) + ) + assert ( + len(internal_log_files) == 1 + ), "pg_createsubscriber_internal.log file was created" + assert ( + os.path.getsize(internal_log_files[0]) != 0 + ), "pg_createsubscriber_internal.log file not empty" + internal_log = pypg.slurp_file(internal_log_files[0]) + assert re.search( + r"target server reached the consistent state", internal_log + ), "log shows consistent state reached" + node_s.start() + assert ( + node_s.safe_psql("SELECT pg_catalog.pg_is_in_recovery()") == "t" + ), "standby is in recovery" + node_s.stop() + + +def _no_databases_and_all_failures(pg_bin, node_p, node_s, db1): + """--dry-run without --database succeeds; --database/--publication + --all fail.""" + pg_bin.command_ok( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--replication-slot", + "replslot1", + ], + "run pg_createsubscriber without --databases", + ) + pg_bin.command_fails_like( + [ + "pg_createsubscriber", + "--verbose", + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--database", + db1, + "--all", + ], + r"options --database and -a/--all cannot be used together", + "fail if --database is used with --all", + ) + pg_bin.command_fails_like( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--all", + "--publication", + "pub1", + ], + r"options --publication and -a/--all cannot be used together", + "fail if --publication is used with --all", + ) + + +def _all_option_counts(pg_bin, node_p, node_s): + """--all dry-run reports objects for postgres + the two extra databases.""" + result = pg_bin.run_command( + [ + "pg_createsubscriber", + "--verbose", + "--dry-run", + "--recovery-timeout", + pypg.test_timeout_default(), + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.connstr(), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--all", + ] + ) + stderr = result.stderr + # The expected count 3 refers to postgres, db1 and db2 databases. + assert ( + len(re.findall(r"would create publication", stderr)) == 3 + ), "verify publications are created for all databases" + assert ( + len(re.findall(r"would create the replication slot", stderr)) == 3 + ), "verify replication slots are created for all databases" + assert ( + len(re.findall(r"would create subscription", stderr)) == 3 + ), "verify subscriptions are created for all databases" + + +def _run_on_standby(pg_bin, node_p, node_s, db1, db2): + """Convert node S with --enable-two-phase and --clean publications.""" + node_p.safe_psql( + "CREATE PUBLICATION test_pub3 FOR TABLE tbl1;\n" + "CREATE TABLE not_replicated (a int);", + dbname=db1, + ) + pg_bin.command_ok( + [ + "pg_createsubscriber", + "--verbose", + "--verbose", + "--recovery-timeout", + pypg.test_timeout_default(), + "--pgdata", + node_s.datadir, + "--publisher-server", + node_p.dbname_connstr(db1), + "--socketdir", + node_s.host, + "--subscriber-port", + node_s.port, + "--publication", + "test_pub3", + "--publication", + "pub2", + "--replication-slot", + "replslot1", + "--replication-slot", + "replslot2", + "--database", + db1, + "--database", + db2, + "--enable-two-phase", + "--clean", + "publications", + ], + "run pg_createsubscriber on node S", + ) + assert os.path.isfile( + node_s.datadir / "pg_createsubscriber.conf.disabled" + ), "pg_createsubscriber.conf.disabled exists in node S" + + +def _verify_results(node_p, node_s, db1, db2, slotname, dummy_sub, fslotname): + """Verify slot removal, replication content, two-phase, and publications.""" + result = node_p.safe_psql( + "SELECT count(*) FROM pg_replication_slots " + "WHERE slot_name = '{}'".format(slotname), + dbname=db1, + ) + assert ( + result == "0" + ), "the physical replication slot used as primary_slot_name has been removed" + node_p.safe_psql("INSERT INTO tbl1 VALUES('third row')", dbname=db1) + node_p.safe_psql("INSERT INTO tbl2 VALUES('row 1')", dbname=db2) + node_p.safe_psql("INSERT INTO not_replicated VALUES(0)", dbname=db1) + node_s.start() + assert ( + node_s.safe_psql("SELECT COUNT(*) FROM pg_publication", dbname=db1) == "0" + ), "all publications were removed from db1" + assert ( + node_s.safe_psql("SELECT COUNT(*) FROM pg_publication", dbname=db2) == "0" + ), "all publications were removed from db2" + assert ( + node_s.safe_psql( + "SELECT count(1) = 0 FROM pg_subscription WHERE subtwophasestate = 'd'" + ) + == "t" + ), "subscriptions are created with the two-phase option enabled" + result = node_s.safe_psql( + "SELECT count(*) FROM pg_subscription WHERE subname = '{}'".format(dummy_sub) + ) + assert result == "0", "pre-existing subscription was dropped" + result = node_s.safe_psql( + "SELECT subname FROM pg_subscription WHERE subname ~ '^pg_createsubscriber_'" + ) + subnames = result.split("\n") + node_s.wait_for_subscription_sync(node_p, subnames[0]) + node_s.wait_for_subscription_sync(node_p, subnames[1]) + result = node_s.safe_psql( + "SELECT count(*) FROM pg_replication_slots " + "WHERE slot_name = '{}'".format(fslotname), + dbname=db1, + ) + assert result == "0", "failover slot was removed" + assert ( + node_s.safe_psql("SELECT * FROM tbl1", dbname=db1) + == "first row\nsecond row\nthird row" + ), "logical replication works in database {}".format(db1) + assert ( + node_s.safe_psql("SELECT * FROM not_replicated", dbname=db1) == "" + ), "table is not replicated in database {}".format(db1) + assert ( + node_s.safe_psql("SELECT * FROM tbl2", dbname=db2) == "row 1" + ), "logical replication works in database {}".format(db2) + + +def _verify_identity_and_publications(node_p, node_s, db1, db2): + """System identifier changed; publications/subscriptions are correct.""" + sysid_p = node_p.safe_psql("SELECT system_identifier FROM pg_control_system()") + sysid_s = node_s.safe_psql("SELECT system_identifier FROM pg_control_system()") + assert sysid_p != sysid_s, "system identifier was changed" + assert ( + node_p.safe_psql( + "SELECT COUNT(*) FROM pg_publication WHERE pubname = 'pub2'", + dbname=db2, + ) + == "1" + ), "publication pub2 was created in {}".format(db2) + result = node_s.safe_psql( + "SELECT subname, subpublications FROM pg_subscription " + "WHERE subname ~ '^pg_createsubscriber_'\n" + "ORDER BY subpublications;" + ) + assert re.search( + r"^pg_createsubscriber_\d+_[0-9a-f]+ \|\{pub2\}\n" + r"\s*pg_createsubscriber_\d+_[0-9a-f]+ \|\{test_pub3\}$", + result, + re.VERBOSE, + ), "subscription and publication names are ok" + result = node_s.safe_psql( + "SELECT d.datname, s.subpublications\n" + "FROM pg_subscription s\n" + "JOIN pg_database d ON d.oid = s.subdbid\n" + "WHERE subname ~ '^pg_createsubscriber_'\n" + "ORDER BY s.subdbid" + ) + assert result == "{}|{{test_pub3}}\n{}|{{pub2}}".format( + db1, db2 + ), "subscriptions use the correct publications" + + +def _physical_standby_of_subscriber(pg_bin, create_pg, node_s, slotname): + """A new physical standby (node K) of the promoted subscriber starts cleanly.""" + sconnstr = node_s.connstr() + node_s.safe_psql( + "SELECT pg_create_physical_replication_slot('{}');".format(slotname) + ) + node_s.backup("backup_3") + node_k = create_pg( + "node_k", from_backup=(node_s, "backup_3"), has_streaming=True, start=False + ) + assert os.path.isfile( + node_k.datadir / "pg_createsubscriber.conf.disabled" + ), "pg_createsubscriber.conf.disabled exists in node K" + node_k.append_conf( + "\nprimary_slot_name = '{}'\n" + "primary_conninfo = '{} dbname=postgres'\n" + "hot_standby_feedback = on\n".format(slotname, sconnstr) + ) + node_k.set_standby_mode() + node_k_name = node_s.name + pg_bin.command_ok( + [ + "pg_ctl", + "--wait", + "--pgdata", + node_k.datadir, + "--log", + node_k.log, + "--options", + "--cluster-name={}".format(node_k_name), + "start", + ], + "node K has started", + ) + # A direct pg_ctl stop is used rather than node.stop(), because the node's + # postmaster PID was not tracked (it was not started via node.start()). + pg_bin.run_command(["pg_ctl", "stop", "--pgdata", node_k.datadir]) + + +def test_040_pg_createsubscriber(create_pg, pg_bin, tmp_path): + """Convert a standby into a logical subscriber and verify the result.""" + pg_bin.program_help_ok("pg_createsubscriber") + pg_bin.program_version_ok("pg_createsubscriber") + pg_bin.program_options_handling_ok("pg_createsubscriber") + + datadir = str(tmp_path / "datadir") + logdir = str(tmp_path / "logdir") + os.mkdir(datadir) + os.mkdir(logdir) + + _mandatory_option_failures(pg_bin, datadir) + + node_p, node_f = _setup_publisher(create_pg) + db1 = _generate_db(node_p, 'regression\\"\\', 1, 45, '\\\\"\\\\\\') + db2 = _generate_db(node_p, "regression", 46, 90, "") + slotname = _populate_publisher(node_p, db1, db2) + node_s = _setup_standby(create_pg, node_p, slotname) + + _promoted_and_about_to_fail_checks(pg_bin, create_pg, node_p, node_s, db1, db2) + _about_to_fail_node_check(pg_bin, node_f, node_p, db1, db2) + _cascading_standby_check(pg_bin, create_pg, node_s, db1, db2) + _unmet_conditions_checks(pg_bin, node_p, node_s, db1, db2) + + fslotname, dummy_sub = _prepare_objects_for_removal(node_p, node_s, db1) + _dry_run_and_logdir(pg_bin, node_p, node_s, logdir, db1, db2) + _no_databases_and_all_failures(pg_bin, node_p, node_s, db1) + _all_option_counts(pg_bin, node_p, node_s) + _run_on_standby(pg_bin, node_p, node_s, db1, db2) + + _verify_results(node_p, node_s, db1, db2, slotname, dummy_sub, fslotname) + _verify_identity_and_publications(node_p, node_s, db1, db2) + _physical_standby_of_subscriber(pg_bin, create_pg, node_s, slotname) diff --git a/src/bin/pg_combinebackup/meson.build b/src/bin/pg_combinebackup/meson.build index a35b86f3f5987..e12e6ee83a090 100644 --- a/src/bin/pg_combinebackup/meson.build +++ b/src/bin/pg_combinebackup/meson.build @@ -26,6 +26,21 @@ tests += { 'name': 'pg_combinebackup', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_005_integrity.py', + 'pyt/test_006_db_file_copy.py', + 'pyt/test_002_compare_backups.py', + 'pyt/test_003_timeline.py', + 'pyt/test_004_manifest.py', + 'pyt/test_007_wal_level_minimal.py', + 'pyt/test_008_promote.py', + 'pyt/test_009_no_full_file.py', + 'pyt/test_010_hardlink.py', + 'pyt/test_011_ib_truncation.py', + ], + }, 'tap': { 'tests': [ 't/001_basic.pl', diff --git a/src/bin/pg_combinebackup/pyt/test_001_basic.py b/src/bin/pg_combinebackup/pyt/test_001_basic.py new file mode 100644 index 0000000000000..7e789af841685 --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_001_basic.py @@ -0,0 +1,26 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_combinebackup/t/001_basic.pl. + +pg_combinebackup argument validation: missing input directories and missing output directory each fail with the documented message. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_basic(pg_bin, tmp_path): + """pg_combinebackup argument validation.""" + tempdir = tmp_path + pg_bin.program_help_ok("pg_combinebackup") + pg_bin.program_version_ok("pg_combinebackup") + pg_bin.program_options_handling_ok("pg_combinebackup") + pg_bin.command_fails_like( + ["pg_combinebackup"], + r"""no input directories specified""", + "input directories must be specified", + ) + pg_bin.command_fails_like( + ["pg_combinebackup", str(tempdir)], + r"""no output directory specified""", + "output directory must be specified", + ) diff --git a/src/bin/pg_combinebackup/pyt/test_002_compare_backups.py b/src/bin/pg_combinebackup/pyt/test_002_compare_backups.py new file mode 100644 index 0000000000000..4f8fc1e35f476 --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_002_compare_backups.py @@ -0,0 +1,198 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_combinebackup/t/002_compare_backups.pl. + +A full backup and an incremental backup (taken after a variety of changes, +including tablespace activity) are each restored to a PITR standby at the same +LSN -- one directly from the full backup, the other by combining full + +incremental. The two restored clusters must be logically identical, as shown by +matching pg_dumpall output. +""" + +import os +import re +import tempfile + +import pypg + + +def _normalize(line_a, line_b): + # tspitr1 vs tspitr2 differ only in the trailing digit of the location. + pat = re.compile(r"(create tablespace .* location .*\btspitr)[12]", re.IGNORECASE) + return pat.sub(r"\1N", line_a), pat.sub(r"\1N", line_b) + + +def test_002_compare_backups(create_pg): + """Direct-restore and combined-restore PITR clusters dump identically.""" + tempdir = tempfile.mkdtemp(prefix="cmpbk_") + mode = os.environ.get("PG_TEST_PG_COMBINEBACKUP_MODE") or "--copy" + primary = create_pg( + "primary", has_archiving=True, allows_streaming=True, start=False + ) + primary.append_conf("summarize_wal = on") + primary.start() + tsprimary = tempdir + "/ts" + os.mkdir(tsprimary) + primary.safe_psql( + "CREATE TABLE will_change (a int, b text);\n" + "INSERT INTO will_change VALUES (1, 'initial test row');\n" + "CREATE TABLE will_grow (a int, b text);\n" + "INSERT INTO will_grow VALUES (1, 'initial test row');\n" + "CREATE TABLE will_shrink (a int, b text);\n" + "INSERT INTO will_shrink VALUES (1, 'initial test row');\n" + "CREATE TABLE will_get_vacuumed (a int, b text);\n" + "INSERT INTO will_get_vacuumed VALUES (1, 'initial test row');\n" + "CREATE TABLE will_get_dropped (a int, b text);\n" + "INSERT INTO will_get_dropped VALUES (1, 'initial test row');\n" + "CREATE TABLE will_get_rewritten (a int, b text);\n" + "INSERT INTO will_get_rewritten VALUES (1, 'initial test row');\n" + "CREATE DATABASE db_will_get_dropped;\n" + "CREATE TABLESPACE ts1 LOCATION '{}';\n" + "CREATE TABLE will_not_change_in_ts (a int, b text) TABLESPACE ts1;\n" + "INSERT INTO will_not_change_in_ts VALUES (1, 'initial test row');\n" + "CREATE TABLE will_change_in_ts (a int, b text) TABLESPACE ts1;\n" + "INSERT INTO will_change_in_ts VALUES (1, 'initial test row');\n" + "CREATE TABLE will_get_dropped_in_ts (a int, b text);\n" + "INSERT INTO will_get_dropped_in_ts VALUES (1, 'initial test row');".format( + tsprimary + ) + ) + tsoids = [ + d + for d in pypg.slurp_dir("{}/pg_tblspc".format(primary.datadir)) + if d and d[0].isdigit() + ] + assert len(tsoids) == 1, "exactly one user-defined tablespace" + tsoid = tsoids[0] + backup1path = "{}/backup1".format(primary.backup_dir) + tsbackup1path = tempdir + "/ts1backup" + os.mkdir(tsbackup1path) + primary.command_ok( + [ + "pg_basebackup", + "--no-sync", + "--pgdata", + backup1path, + "--checkpoint", + "fast", + "--tablespace-mapping", + "{}={}".format(tsprimary, tsbackup1path), + ], + "full backup", + ) + primary.safe_psql( + "UPDATE will_change SET b = 'modified value' WHERE a = 1;\n" + "UPDATE will_change_in_ts SET b = 'modified value' WHERE a = 1;\n" + "INSERT INTO will_grow SELECT g, 'additional row' " + "FROM generate_series(2, 5000) g;\n" + "TRUNCATE will_shrink;\n" + "VACUUM will_get_vacuumed;\n" + "DROP TABLE will_get_dropped;\n" + "DROP TABLE will_get_dropped_in_ts;\n" + "CREATE TABLE newly_created (a int, b text);\n" + "INSERT INTO newly_created VALUES (1, 'row for new table');\n" + "CREATE TABLE newly_created_in_ts (a int, b text) TABLESPACE ts1;\n" + "INSERT INTO newly_created_in_ts VALUES (1, 'row for new table');\n" + "VACUUM FULL will_get_rewritten;\n" + "DROP DATABASE db_will_get_dropped;\n" + "CREATE DATABASE db_newly_created;" + ) + backup2path = "{}/backup2".format(primary.backup_dir) + tsbackup2path = tempdir + "/tsbackup2" + os.mkdir(tsbackup2path) + primary.command_ok( + [ + "pg_basebackup", + "--no-sync", + "--pgdata", + backup2path, + "--checkpoint", + "fast", + "--tablespace-mapping", + "{}={}".format(tsprimary, tsbackup2path), + "--incremental", + backup1path + "/backup_manifest", + ], + "incremental backup", + ) + lsn = primary.safe_psql("SELECT pg_current_wal_lsn();") + primary.safe_psql("SELECT txid_current();") + primary.safe_psql("SELECT pg_switch_wal()") + assert primary.poll_query_until( + "SELECT pg_walfile_name('{}') <= last_archived_wal " + "FROM pg_stat_archiver;".format(lsn) + ), "Timed out while waiting for WAL segment to be archived" + tspitr1path = tempdir + "/tspitr1" + pitr1 = create_pg( + "pitr1", + from_backup=(primary, "backup1"), + standby=True, + has_restoring=True, + tablespace_map={tsoid: tspitr1path}, + start=False, + ) + pitr1.append_conf( + "\nrecovery_target_lsn = '{}'\n" + "recovery_target_action = 'promote'\narchive_mode = 'off'\n".format(lsn) + ) + pitr1.start() + tspitr2path = tempdir + "/tspitr2" + pitr2 = create_pg( + "pitr2", + from_backup=(primary, "backup2"), + standby=True, + has_restoring=True, + combine_with_prior=["backup1"], + tablespace_map={tsbackup2path: tspitr2path}, + combine_mode=mode, + start=False, + ) + pitr2.append_conf( + "\nrecovery_target_lsn = '{}'\n" + "recovery_target_action = 'promote'\narchive_mode = 'off'\n".format(lsn) + ) + pitr2.start() + assert pitr1.poll_query_until( + "SELECT NOT pg_is_in_recovery();" + ), "Timed out while waiting apply to reach LSN {}".format(lsn) + assert pitr2.poll_query_until( + "SELECT NOT pg_is_in_recovery();" + ), "Timed out while waiting apply to reach LSN {}".format(lsn) + backupdir = primary.backup_dir + dump1 = "{}/pitr1.dump".format(backupdir) + dump2 = "{}/pitr2.dump".format(backupdir) + pitr1.command_ok( + [ + "pg_dumpall", + "--restrict-key", + "test", + "--no-sync", + "--no-unlogged-table-data", + "--file", + dump1, + "--dbname", + pitr1.connstr("postgres"), + ], + "dump from PITR 1", + ) + pitr2.command_ok( + [ + "pg_dumpall", + "--restrict-key", + "test", + "--no-sync", + "--no-unlogged-table-data", + "--file", + dump2, + "--dbname", + pitr2.connstr("postgres"), + ], + "dump from PITR 2", + ) + pypg.compare_files( + dump1, dump2, "contents of dumps match for both PITRs", _normalize + ) + pitr1.stop() + pitr2.stop() + primary.stop() diff --git a/src/bin/pg_combinebackup/pyt/test_003_timeline.py b/src/bin/pg_combinebackup/pyt/test_003_timeline.py new file mode 100644 index 0000000000000..9dec85d112081 --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_003_timeline.py @@ -0,0 +1,87 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_combinebackup/t/003_timeline.pl. + +A chain of full + incremental backups taken across a branch (a second node +restored from the combined chain and written to) combines correctly: a third +node restored from full+backup2+backup3 sees exactly the rows that belong on its +branch, and every backup in the chain verifies. +""" + +import os + + +def test_003_timeline(create_pg): + """pg_combinebackup correctly follows a full+incremental backup chain.""" + mode = os.environ.get("PG_TEST_PG_COMBINEBACKUP_MODE") or "--copy" + node1 = create_pg("node1", has_archiving=True, allows_streaming=True, start=False) + node1.append_conf("summarize_wal = on") + node1.start() + node1.safe_psql( + "CREATE TABLE mytable (a int, b text);\n" + "INSERT INTO mytable VALUES (1, 'aardvark');" + ) + backup1path = "{}/backup1".format(node1.backup_dir) + node1.command_ok( + ["pg_basebackup", "--pgdata", backup1path, "--no-sync", "--checkpoint", "fast"], + "full backup from node1", + ) + node1.safe_psql("INSERT INTO mytable VALUES (2, 'beetle');") + backup2path = "{}/backup2".format(node1.backup_dir) + node1.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup2path, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + backup1path + "/backup_manifest", + ], + "incremental backup from node1", + ) + node2 = create_pg( + "node2", + from_backup=(node1, "backup2"), + combine_with_prior=["backup1"], + start=False, + ) + node2.start() + node1.safe_psql("INSERT INTO mytable VALUES (3, 'crab');") + node2.safe_psql("INSERT INTO mytable VALUES (4, 'dingo');") + backup3path = "{}/backup3".format(node1.backup_dir) + node2.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup3path, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + backup2path + "/backup_manifest", + ], + "incremental backup from node2", + ) + node3 = create_pg( + "node3", + from_backup=(node1, "backup3"), + combine_with_prior=["backup1", "backup2"], + combine_mode=mode, + start=False, + ) + node3.start() + node3.safe_psql("INSERT INTO mytable VALUES (5, 'elephant');") + result = node3.safe_psql( + "select string_agg(a::text, ':'), string_agg(b, ':') from mytable;" + ) + assert result == "1:2:4:5|aardvark:beetle:dingo:elephant" + for backup_name in ("backup1", "backup2", "backup3"): + node1.command_ok( + ["pg_verifybackup", "{}/{}".format(node1.backup_dir, backup_name)], + "verify backup {}".format(backup_name), + ) + node3.stop() + node2.stop() + node1.stop() diff --git a/src/bin/pg_combinebackup/pyt/test_004_manifest.py b/src/bin/pg_combinebackup/pyt/test_004_manifest.py new file mode 100644 index 0000000000000..914ddcce20ed5 --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_004_manifest.py @@ -0,0 +1,65 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_combinebackup/t/004_manifest.pl. + +pg_combinebackup manifest options: --no-manifest produces an unverifiable +backup; --manifest-checksums=NONE/SHA224 control the manifest's checksum +algorithm (SHA224 appears throughout, none omits Checksum-Algorithm), and each +combined backup verifies as expected. +""" + +import os +import re + +import pypg + + +def _combine_and_test(node, original, backup_name, failure_pattern, extra): + revised = "{}/{}".format(node.backup_dir, backup_name) + node.command_ok( + ["pg_combinebackup", original, "--output", revised, "--no-sync"] + extra, + "pg_combinebackup with {}".format(" ".join(extra)), + ) + if failure_pattern is not None: + node.command_fails_like( + ["pg_verifybackup", revised], + failure_pattern, + "unable to verify backup {}".format(backup_name), + ) + else: + node.command_ok( + ["pg_verifybackup", revised], "verify backup {}".format(backup_name) + ) + + +def test_004_manifest(create_pg): + """pg_combinebackup --no-manifest and --manifest-checksums behavior.""" + mode = os.environ.get("PG_TEST_PG_COMBINEBACKUP_MODE") or "--copy" + node = create_pg("node", has_archiving=True, allows_streaming=True) + original = "{}/original".format(node.backup_dir) + node.command_ok( + ["pg_basebackup", "--pgdata", original, "--no-sync", "--checkpoint", "fast"], + "full backup", + ) + node.command_ok(["pg_verifybackup", original], "verify original backup") + _combine_and_test( + node, + original, + "nomanifest", + r"could not open file.*backup_manifest", + ["--no-manifest"], + ) + _combine_and_test( + node, original, "csum_none", None, ["--manifest-checksums=NONE", mode] + ) + _combine_and_test( + node, original, "csum_sha224", None, ["--manifest-checksums=SHA224", mode] + ) + sha224 = pypg.slurp_file("{}/csum_sha224/backup_manifest".format(node.backup_dir)) + assert ( + len(re.findall(r"SHA224", sha224, re.IGNORECASE)) > 100 + ), "SHA224 is mentioned many times in SHA224 manifest" + nocsum = pypg.slurp_file("{}/csum_none/backup_manifest".format(node.backup_dir)) + assert ( + len(re.findall(r"Checksum-Algorithm", nocsum, re.IGNORECASE)) == 0 + ), "Checksum-Algorithm is not mentioned in no-checksum manifest" diff --git a/src/bin/pg_combinebackup/pyt/test_005_integrity.py b/src/bin/pg_combinebackup/pyt/test_005_integrity.py new file mode 100644 index 0000000000000..2167c96828df5 --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_005_integrity.py @@ -0,0 +1,166 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_combinebackup/t/005_integrity.pl. + +pg_combinebackup integrity checks: it rejects combining two full backups, a +non-full first backup, backups from different nodes (system identifier +mismatch), a manifest whose system identifier disagrees with the control file, +an omitted required backup, backups given out of order, and a synthetic backup +re-combined with an already-included incremental -- while accepting valid +full+incremental chains (including stepwise synthetic combination). +""" + +import os +import shutil + + +def _fails(node, args, pattern, msg, mode): + node.command_fails_like(["pg_combinebackup"] + args + [mode], pattern, msg) + + +def test_005_integrity(create_pg): + """pg_combinebackup rejects malformed chains and accepts valid ones.""" + mode = os.environ.get("PG_TEST_PG_COMBINEBACKUP_MODE") or "--copy" + node1 = create_pg("node1", has_archiving=True, allows_streaming=True, start=False) + node1.append_conf("summarize_wal = on") + node1.start() + with open("{}/INCREMENTAL.config".format(node1.datadir), "w", encoding="utf-8"): + pass + node2 = create_pg( + "node2", + force_initdb=True, + has_archiving=True, + allows_streaming=True, + start=False, + ) + node2.append_conf("summarize_wal = on") + node2.start() + bdir = node1.backup_dir + backup1 = "{}/backup1".format(bdir) + backup2 = "{}/backup2".format(bdir) + backup3 = "{}/backup3".format(bdir) + other1 = "{}/backupother1".format(bdir) + other2 = "{}/backupother2".format(bdir) + result = "{}/result".format(bdir) + node1.command_ok( + ["pg_basebackup", "--pgdata", backup1, "--no-sync", "--checkpoint", "fast"], + "full backup from node1", + ) + node1.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup2, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + backup1 + "/backup_manifest", + ], + "incremental backup from node1", + ) + node1.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup3, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + backup2 + "/backup_manifest", + ], + "another incremental backup from node1", + ) + node2.command_ok( + ["pg_basebackup", "--pgdata", other1, "--no-sync", "--checkpoint", "fast"], + "full backup from node2", + ) + node2.command_ok( + [ + "pg_basebackup", + "--pgdata", + other2, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + other1 + "/backup_manifest", + ], + "incremental backup from node2", + ) + _fails( + node1, + [backup1, backup1, "--output", result], + r"is a full backup, but only the first backup should be a full backup", + "can't combine full backups", + mode, + ) + _fails( + node1, + [backup2, backup2, "--output", result], + r"is an incremental backup, but the first backup should be a full backup", + "can't combine full backups", + mode, + ) + _fails( + node1, + [backup1, other2, "--output", result], + r"expected system identifier.*but found", + "can't combine backups from different nodes", + mode, + ) + os.rename( + "{}/backup_manifest".format(backup2), "{}/backup_manifest.orig".format(backup2) + ) + shutil.copy( + "{}/backup_manifest".format(other2), "{}/backup_manifest".format(backup2) + ) + _fails( + node1, + [backup1, backup2, backup3, "--output", result], + r" manifest system identifier is .*, but control file has ", + "can't combine backups with different manifest system identifier ", + mode, + ) + shutil.move( + "{}/backup_manifest.orig".format(backup2), "{}/backup_manifest".format(backup2) + ) + _fails( + node1, + [backup1, backup3, "--output", result], + r"starts at LSN.*but expected", + "can't omit a required backup", + mode, + ) + _fails( + node1, + [backup1, backup3, backup2, "--output", result], + r"starts at LSN.*but expected", + "can't combine backups in the wrong order", + mode, + ) + node1.command_ok( + ["pg_combinebackup", backup1, backup2, backup3, "--output", result, mode], + "can combine 3 matching backups", + ) + shutil.rmtree(result) + synthetic12 = "{}/synthetic12".format(bdir) + node1.command_ok( + ["pg_combinebackup", backup1, backup2, "--output", synthetic12, mode], + "can combine 2 matching backups", + ) + node1.command_ok( + ["pg_combinebackup", synthetic12, backup3, "--output", result, mode], + "can combine synthetic backup with later incremental", + ) + shutil.rmtree(result) + _fails( + node1, + [synthetic12, backup2, "--output", result], + r"starts at LSN.*but expected", + "can't combine synthetic backup with included incremental", + mode, + ) + node1.stop() + node2.stop() diff --git a/src/bin/pg_combinebackup/pyt/test_006_db_file_copy.py b/src/bin/pg_combinebackup/pyt/test_006_db_file_copy.py new file mode 100644 index 0000000000000..a1b980c22b5e3 --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_006_db_file_copy.py @@ -0,0 +1,60 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=implicit-str-concat +"""Port of src/bin/pg_combinebackup/t/006_db_file_copy.pl. + +When a database is dropped and recreated with the same OID (via FILE_COPY +strategy) between a full and an incremental backup, pg_combinebackup must +reconstruct the *recreated* (empty) database, not the original: the table that +existed in the pre-drop database is absent after combining and restoring. +""" + +import os +import re + + +def test_006_db_file_copy(create_pg): + """A reused-OID FILE_COPY database is reconstructed empty after combine.""" + mode = os.environ.get("PG_TEST_PG_COMBINEBACKUP_MODE") or "--copy" + primary = create_pg( + "primary", has_archiving=True, allows_streaming=True, start=False + ) + primary.append_conf("summarize_wal = on") + primary.start() + primary.safe_psql("CREATE DATABASE lakh OID = 100000 STRATEGY = FILE_COPY") + primary.safe_psql("CREATE TABLE t1 (a int)", dbname="lakh") + backup1path = "{}/backup1".format(primary.backup_dir) + primary.command_ok( + ["pg_basebackup", "--pgdata", backup1path, "--no-sync", "--checkpoint", "fast"], + "full backup", + ) + primary.safe_psql( + "DROP DATABASE lakh;\n" "CREATE DATABASE lakh OID = 100000 STRATEGY = FILE_COPY" + ) + backup2path = "{}/backup2".format(primary.backup_dir) + primary.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup2path, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + backup1path + "/backup_manifest", + ], + "incremental backup", + ) + restore = create_pg( + "restore", + from_backup=(primary, "backup2"), + combine_with_prior=["backup1"], + combine_mode=mode, + start=False, + ) + restore.start() + res = restore.psql_capture("SELECT * FROM t1", dbname="lakh") + assert res.stdout == "", "SELECT * FROM t1: no stdout" + assert re.search( + r'relation "t1" does not exist', res.stderr + ), "SELECT * FROM t1: stderr missing table" diff --git a/src/bin/pg_combinebackup/pyt/test_007_wal_level_minimal.py b/src/bin/pg_combinebackup/pyt/test_007_wal_level_minimal.py new file mode 100644 index 0000000000000..45a73753bacee --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_007_wal_level_minimal.py @@ -0,0 +1,52 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_combinebackup/t/007_wal_level_minimal.pl. + +An incremental backup cannot be taken across a window where the server ran at +wal_level=minimal (with WAL summarization off): the WAL summaries needed since +the full backup are incomplete, so pg_basebackup --incremental fails. +""" + + +def test_007_wal_level_minimal(create_pg): + """Incremental backup fails when WAL summaries are incomplete (minimal level).""" + node1 = create_pg("node1", allows_streaming=True, start=False) + node1.append_conf("summarize_wal = on\nwal_keep_size = '1GB'\n") + node1.start() + node1.safe_psql( + "CREATE TABLE mytable (a int, b text);\n" + "INSERT INTO mytable VALUES (1, 'finch');" + ) + backup1path = "{}/backup1".format(node1.backup_dir) + node1.command_ok( + ["pg_basebackup", "--pgdata", backup1path, "--no-sync", "--checkpoint", "fast"], + "full backup", + ) + node1.safe_psql( + "ALTER SYSTEM SET wal_level = minimal;\n" + "ALTER SYSTEM SET max_wal_senders = 0;\n" + "ALTER SYSTEM SET summarize_wal = off;" + ) + node1.restart() + node1.safe_psql("INSERT INTO mytable VALUES (2, 'gerbil');") + node1.safe_psql( + "ALTER SYSTEM RESET wal_level;\n" + "ALTER SYSTEM RESET max_wal_senders;\n" + "ALTER SYSTEM RESET summarize_wal;" + ) + node1.restart() + backup2path = "{}/backup2".format(node1.backup_dir) + node1.command_fails_like( + [ + "pg_basebackup", + "--pgdata", + backup2path, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + backup1path + "/backup_manifest", + ], + r"WAL summaries are required on timeline 1 from.*are incomplete", + "incremental backup fails", + ) diff --git a/src/bin/pg_combinebackup/pyt/test_008_promote.py b/src/bin/pg_combinebackup/pyt/test_008_promote.py new file mode 100644 index 0000000000000..eac5247527a35 --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_008_promote.py @@ -0,0 +1,61 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_combinebackup/t/008_promote.pl. + +An incremental backup can be taken from a promoted standby (on a new timeline) +relative to a full backup from the original primary, and pg_combinebackup can +reconstruct a usable data directory from that full + incremental chain. +""" + + +def test_008_promote(create_pg): + """Incremental backup from a promoted standby combines into a usable cluster.""" + node1 = create_pg("node1", has_archiving=True, allows_streaming=True, start=False) + node1.append_conf("summarize_wal = on") + node1.append_conf("log_min_messages = debug1") + node1.start() + node1.safe_psql( + "CREATE TABLE mytable (a int, b text);\n" + "INSERT INTO mytable VALUES (1, 'avocado');" + ) + backup1path = "{}/backup1".format(node1.backup_dir) + node1.command_ok( + ["pg_basebackup", "--pgdata", backup1path, "--no-sync", "--checkpoint", "fast"], + "full backup from node1", + ) + node1.safe_psql("CHECKPOINT") + lsn = node1.safe_psql("SELECT pg_current_wal_insert_lsn()") + node1.safe_psql("INSERT INTO mytable VALUES (2, 'beetle');") + node2 = create_pg( + "node2", from_backup=(node1, "backup1"), has_streaming=True, start=False + ) + node2.append_conf( + "recovery_target_lsn = '{}'\nrecovery_target_action = 'pause'\n".format(lsn) + ) + node2.start() + node2.poll_query_until("SELECT pg_get_wal_replay_pause_state() = 'paused';") + node2.safe_psql("SELECT pg_promote()") + node2.poll_query_until("SELECT pg_is_in_recovery() = 'f';") + node2.safe_psql("INSERT INTO mytable VALUES (2, 'blackberry');") + backup2path = "{}/backup2".format(node1.backup_dir) + node2.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup2path, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + backup1path + "/backup_manifest", + ], + "incremental backup from node2", + ) + node3 = create_pg( + "node3", + from_backup=(node1, "backup2"), + combine_with_prior=["backup1"], + start=False, + ) + node3.start() + node3.stop() diff --git a/src/bin/pg_combinebackup/pyt/test_009_no_full_file.py b/src/bin/pg_combinebackup/pyt/test_009_no_full_file.py new file mode 100644 index 0000000000000..02b85f083b002 --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_009_no_full_file.py @@ -0,0 +1,62 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_combinebackup/t/009_no_full_file.pl. + +pg_combinebackup must reject a chain where the supposed full backup actually +contains an incremental file: after corrupting the full backup so a file is +replaced by its INCREMENTAL.* counterpart, combining fails with a clear error. +""" + +import os +import shutil + +import pypg + + +def test_009_no_full_file(create_pg): + """pg_combinebackup fails when the full backup contains an incremental file.""" + primary = create_pg( + "primary", has_archiving=True, allows_streaming=True, start=False + ) + primary.append_conf("summarize_wal = on") + primary.start() + backup1path = "{}/backup1".format(primary.backup_dir) + primary.command_ok( + ["pg_basebackup", "--pgdata", backup1path, "--no-sync", "--checkpoint", "fast"], + "full backup", + ) + backup2path = "{}/backup2".format(primary.backup_dir) + primary.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup2path, + "--no-sync", + "--checkpoint", + "fast", + "--incremental", + backup1path + "/backup_manifest", + ], + "incremental backup", + ) + filelist = [ + f + for f in pypg.slurp_dir("{}/base/1".format(backup2path)) + if f.startswith("INCREMENTAL.") + ] + for iname in filelist: + name = iname[len("INCREMENTAL.") :] + full_file = "{}/base/1/{}".format(backup1path, name) + if os.path.isfile(full_file): + shutil.copy( + "{}/base/1/{}".format(backup2path, iname), + "{}/base/1/{}".format(backup1path, iname), + ) + os.unlink(full_file) + break + outpath = "{}/out".format(primary.backup_dir) + primary.command_fails_like( + ["pg_combinebackup", backup1path, backup2path, "--output", outpath], + r"full backup contains unexpected incremental file", + "pg_combinebackup fails", + ) diff --git a/src/bin/pg_combinebackup/pyt/test_010_hardlink.py b/src/bin/pg_combinebackup/pyt/test_010_hardlink.py new file mode 100644 index 0000000000000..c8417501e445f --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_010_hardlink.py @@ -0,0 +1,95 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_combinebackup/t/010_hardlink.pl. + +pg_combinebackup --link reconstructs a data directory by hard-linking unchanged +relation file segments from the prior backup: every segment except the last of +a relation has 2 hard links (shared with the backup), and the last segment's +link count reflects whether that segment was rewritten in the incremental. +""" + +import os + + +def _hard_link_count(path): + return os.stat(path).st_nlink + + +def _check_data_file(data_file, last_segment_nlinks): + segments = [data_file] + n = 1 + while os.path.isfile("{}.{}".format(data_file, n)): + segments.append("{}.{}".format(data_file, n)) + n += 1 + last = segments.pop() + for segment in segments: + assert _hard_link_count(segment) == 2, "File '{}' has 2 hard links".format( + segment + ) + assert ( + _hard_link_count(last) == last_segment_nlinks + ), "File '{}' has {} hard link(s)".format(last, last_segment_nlinks) + + +def test_010_hardlink(create_pg): + """pg_combinebackup --link hard-links unchanged relation segments.""" + create_query = ( + "CREATE TABLE test_{0} AS\n" + " SELECT x.id::bigint,\n" + " repeat('a', 1600) AS value\n" + " FROM generate_series(1, 100) AS x(id);" + ) + path_query = ( + "SELECT pg_relation_filepath(oid) FROM pg_class WHERE relname = 'test_{0}';" + ) + primary = create_pg( + "primary", has_archiving=True, allows_streaming=True, start=False + ) + primary.append_conf("summarize_wal = on") + primary.append_conf("autovacuum = off") + primary.start() + primary.safe_psql(create_query.format("1")) + primary.safe_psql(create_query.format("2")) + test_1_path = primary.safe_psql(path_query.format("1")) + test_2_path = primary.safe_psql(path_query.format("2")) + backup1path = "{}/backup1".format(primary.backup_dir) + primary.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup1path, + "--no-sync", + "--checkpoint", + "fast", + "--wal-method", + "none", + ], + "full backup", + ) + primary.safe_psql("INSERT INTO test_2 (id, value) VALUES (101, repeat('a', 1600));") + backup2path = "{}/backup2".format(primary.backup_dir) + primary.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup2path, + "--no-sync", + "--checkpoint", + "fast", + "--wal-method", + "none", + "--incremental", + backup1path + "/backup_manifest", + ], + "incremental backup", + ) + restore = create_pg( + "restore", + from_backup=(primary, "backup2"), + combine_with_prior=["backup1"], + combine_mode="--link", + start=False, + ) + _check_data_file("{}/{}".format(restore.datadir, test_1_path), 2) + _check_data_file("{}/{}".format(restore.datadir, test_2_path), 1) + primary.stop() diff --git a/src/bin/pg_combinebackup/pyt/test_011_ib_truncation.py b/src/bin/pg_combinebackup/pyt/test_011_ib_truncation.py new file mode 100644 index 0000000000000..59f4993c14a2e --- /dev/null +++ b/src/bin/pg_combinebackup/pyt/test_011_ib_truncation.py @@ -0,0 +1,66 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_combinebackup/t/011_ib_truncation.pl. + +Incremental backup across a relation truncation: pg_combinebackup correctly reconstructs a table that was truncated between the full and incremental backups (post-truncation block/row counts are preserved in the restored cluster). +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_011_ib_truncation(create_pg): + """Incremental backup reconstructs a relation truncated between backups.""" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf("summarize_wal = on") + primary.start() + backup_path = primary.backup_dir + full_backup = str(backup_path) + "/full" + target_blocks = 6 + block_size = primary.safe_psql("SELECT current_setting('block_size')::int;") + target_rows = int(target_blocks + 2) + rows_after_truncation = int(target_rows - 1) + primary.safe_psql( + "CREATE TABLE t (\n id int,\n data text STORAGE PLAIN\n ) WITH (autovacuum_enabled = false);" + ) + primary.safe_psql( + "INSERT INTO t\n SELECT i,\n repeat('0123456789ABCDEF0123456789ABCDEF', (" + + str(block_size) + + " / (2*32)))\n FROM generate_series(1, " + + str(target_rows) + + ") i;" + ) + primary.safe_psql("VACUUM t;") + t_blocks = primary.safe_psql( + "SELECT pg_relation_size('t') / current_setting('block_size')::int;" + ) + assert int(t_blocks) > int(target_blocks), "target block size exceeded" + primary.backup("full") + primary.safe_psql("DELETE FROM t WHERE id > (" + str(rows_after_truncation) + ");") + primary.safe_psql("VACUUM (TRUNCATE) t;") + t_blocks = primary.safe_psql( + "SELECT pg_relation_size('t') / current_setting('block_size')::int;" + ) + assert t_blocks == str( + rows_after_truncation + ), "post-truncation row count as expected" + assert int(t_blocks) > int(target_blocks), "post-truncation block count as expected" + primary.backup( + "incr", backup_options=["--incremental", str(full_backup) + "/backup_manifest"] + ) + relfilenode = primary.safe_psql("SELECT pg_relation_filenode('t');") + vm_limits = primary.safe_psql( + "SELECT string_agg(relblocknumber::text, ',')\n\t FROM pg_available_wal_summaries() s,\n\t pg_wal_summary_contents(s.tli, s.start_lsn, s.end_lsn) c\n\t WHERE c.relfilenode = " + + str(relfilenode) + + "\n\t AND c.relforknumber = 2\n\t AND c.is_limit_block;" + ) + assert vm_limits == "1", "WAL summary has correct VM fork truncation limit" + restored = create_pg( + "node2", from_backup=(primary, "incr"), combine_with_prior=["full"], start=False + ) + restored.start() + restored_count = restored.safe_psql("SELECT count(*) FROM t;") + assert restored_count == str( + rows_after_truncation + ), "Restored backup has correct row count" + primary.stop() + restored.stop() diff --git a/src/bin/pg_rewind/meson.build b/src/bin/pg_rewind/meson.build index 52a6ab0a51503..fe20aa26553ea 100644 --- a/src/bin/pg_rewind/meson.build +++ b/src/bin/pg_rewind/meson.build @@ -32,6 +32,21 @@ tests += { 'name': 'pg_rewind', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_databases.py', + 'pyt/test_003_extrafiles.py', + 'pyt/test_004_pg_xlog_symlink.py', + 'pyt/test_005_same_timeline.py', + 'pyt/test_006_options.py', + 'pyt/test_007_standby_source.py', + 'pyt/test_008_min_recovery_point.py', + 'pyt/test_009_growing_files.py', + 'pyt/test_010_keep_recycled_wals.py', + 'pyt/test_011_wal_copy.py', + ], + }, 'tap': { 'tests': [ 't/001_basic.pl', diff --git a/src/bin/pg_rewind/pyt/test_001_basic.py b/src/bin/pg_rewind/pyt/test_001_basic.py new file mode 100644 index 0000000000000..ff56b4aaf8109 --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_001_basic.py @@ -0,0 +1,120 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/001_basic.pl. + +End-to-end pg_rewind: after the standby is promoted and the old primary +diverges (inserts, truncation, copy-tail, dropped table, in-place tablespace), +pg_rewind brings the old primary back in line with the promoted standby. The +'local' mode additionally checks pg_rewind's refusal to run against a running +target/source and its --dry-run. Exercised for 'local', 'remote', and +'archive' source modes. +""" + +import pypg + + +def _setup_diverge(rt, test_mode): + rt.setup_cluster(test_mode) + rt.start_primary() + # In-place tablespace with some data. + rt.primary_psql("CREATE TABLESPACE space_test LOCATION ''") + rt.primary_psql("CREATE TABLE space_tbl (d text) TABLESPACE space_test") + rt.primary_psql("INSERT INTO space_tbl VALUES ('in primary, before promotion')") + rt.primary_psql("CREATE TABLE tbl1 (d text)") + rt.primary_psql("INSERT INTO tbl1 VALUES ('in primary')") + rt.primary_psql("CREATE TABLE trunc_tbl (d text)") + rt.primary_psql("INSERT INTO trunc_tbl VALUES ('in primary')") + rt.primary_psql("CREATE TABLE tail_tbl (id integer, d text)") + rt.primary_psql("INSERT INTO tail_tbl VALUES (0, 'in primary')") + rt.primary_psql("CREATE TABLE drop_tbl (d text)") + rt.primary_psql("INSERT INTO drop_tbl VALUES ('in primary')") + rt.primary_psql("CHECKPOINT") + rt.create_standby(test_mode) + # Data replicated to the standby before promotion. + rt.primary_psql("INSERT INTO tbl1 values ('in primary, before promotion')") + rt.primary_psql("INSERT INTO trunc_tbl values ('in primary, before promotion')") + rt.primary_psql( + "INSERT INTO tail_tbl SELECT g, 'in primary, before promotion: ' || g " + "FROM generate_series(1, 10000) g" + ) + rt.primary_psql("CHECKPOINT") + rt.promote_standby() + # Diverge the old primary from the promoted standby. + rt.primary_psql("INSERT INTO tbl1 VALUES ('in primary, after promotion')") + rt.standby_psql("INSERT INTO tbl1 VALUES ('in standby, after promotion')") + rt.primary_psql( + "INSERT INTO trunc_tbl SELECT 'in primary, after promotion: ' || g " + "FROM generate_series(1, 10000) g" + ) + rt.primary_psql("DELETE FROM tail_tbl WHERE id > 10") + rt.primary_psql("VACUUM tail_tbl") + rt.primary_psql("insert into drop_tbl values ('in primary, after promotion')") + rt.primary_psql("DROP TABLE drop_tbl") + rt.primary_psql("INSERT INTO space_tbl VALUES ('in primary, after promotion')") + rt.standby_psql("INSERT INTO space_tbl VALUES ('in standby, after promotion')") + + +def _local_negative_checks(rt, pg_bin): + primary_pgdata = str(rt.primary.datadir) + standby_pgdata = str(rt.standby.datadir) + base = [ + "pg_rewind", + "--debug", + "--source-pgdata", + standby_pgdata, + "--target-pgdata", + primary_pgdata, + "--no-sync", + ] + pg_bin.command_fails(base, "pg_rewind with running target") + pg_bin.command_fails( + base + ["--no-ensure-shutdown"], + "pg_rewind --no-ensure-shutdown with running target", + ) + rt.primary.stop() + pg_bin.command_fails( + base + ["--no-ensure-shutdown"], "pg_rewind with unexpected running source" + ) + rt.standby.stop() + pg_bin.command_ok(base + ["--dry-run"], "pg_rewind --dry-run") + rt.standby.start() + rt.primary.start() + + +def _check_results(rt): + rt.check_query( + "SELECT * FROM space_tbl ORDER BY d", + "in primary, before promotion\nin standby, after promotion", + "table content", + ) + rt.check_query( + "SELECT * FROM tbl1", + "in primary\nin primary, before promotion\nin standby, after promotion", + "table content", + ) + rt.check_query( + "SELECT * FROM trunc_tbl", + "in primary\nin primary, before promotion", + "truncation", + ) + rt.check_query("SELECT count(*) FROM tail_tbl", "10001", "tail-copy") + rt.check_query("SELECT * FROM drop_tbl", "in primary", "drop") + assert pypg.check_mode_recursive( + rt.primary.datadir, 0o700, 0o600 + ), "check PGDATA permissions" + + +def _run_test(rt, pg_bin, test_mode): + _setup_diverge(rt, test_mode) + if test_mode == "local": + _local_negative_checks(rt, pg_bin) + rt.run_pg_rewind(test_mode) + _check_results(rt) + rt.clean_rewind_test() + + +def test_001_basic(rewind_test, pg_bin): + """Full pg_rewind divergence reconciliation (local, remote, archive).""" + _run_test(rewind_test, pg_bin, "local") + _run_test(rewind_test, pg_bin, "remote") + _run_test(rewind_test, pg_bin, "archive") diff --git a/src/bin/pg_rewind/pyt/test_002_databases.py b/src/bin/pg_rewind/pyt/test_002_databases.py new file mode 100644 index 0000000000000..bf0eee9f39167 --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_002_databases.py @@ -0,0 +1,52 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/002_databases.pl. + +After the primary and standby diverge, pg_rewind brings the old primary back in +line with the promoted standby: databases created on the old primary after +promotion disappear, those created on the standby appear, and PGDATA retains +group permissions (initdb -g). Exercised for the 'local' and 'remote' source +modes. +""" + +import pypg + + +def _run_test(rt, test_mode): + rt.setup_cluster(test_mode, ["-g"]) + rt.start_primary() + rt.primary_psql("CREATE DATABASE inprimary") + rt.primary_psql("CREATE TABLE inprimary_tab (a int)", dbname="inprimary") + rt.create_standby(test_mode) + rt.primary_psql("CREATE DATABASE beforepromotion") + rt.primary_psql( + "CREATE TABLE beforepromotion_tab (a int)", dbname="beforepromotion" + ) + rt.promote_standby() + rt.primary_psql("CREATE DATABASE primary_afterpromotion") + rt.primary_psql( + "CREATE TABLE primary_promotion_tab (a int)", dbname="primary_afterpromotion" + ) + rt.standby_psql("CREATE DATABASE standby_afterpromotion") + rt.standby_psql( + "CREATE TABLE standby_promotion_tab (a int)", dbname="standby_afterpromotion" + ) + # The clusters are now diverged. + rt.run_pg_rewind(test_mode) + rt.check_query( + "SELECT datname FROM pg_database ORDER BY 1", + "beforepromotion\ninprimary\npostgres\n" + "standby_afterpromotion\ntemplate0\ntemplate1", + "database names", + ) + # PGDATA should retain group permissions (initdb -g). + assert pypg.check_mode_recursive( + rt.primary.datadir, 0o750, 0o640 + ), "check PGDATA permissions" + rt.clean_rewind_test() + + +def test_002_databases(rewind_test): + """pg_rewind reconciles per-database divergence (local and remote modes).""" + _run_test(rewind_test, "local") + _run_test(rewind_test, "remote") diff --git a/src/bin/pg_rewind/pyt/test_003_extrafiles.py b/src/bin/pg_rewind/pyt/test_003_extrafiles.py new file mode 100644 index 0000000000000..3f8da29f7bd7b --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_003_extrafiles.py @@ -0,0 +1,88 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/003_extrafiles.pl. + +pg_rewind reconciles extra files/directories: files present only on the old +primary (created after promotion) are removed, files present on the standby are +copied in, and files present in both are kept. Exercised for the 'local' and +'remote' source modes. +""" + +import os + + +def _make_tree(base, spec): + for relpath, content in spec: + full = os.path.join(base, relpath) + os.makedirs(os.path.dirname(full), exist_ok=True) + with open(full, "w", encoding="utf-8") as fh: + fh.write(content) + + +def _run_test(rt, test_mode): + rt.setup_cluster(test_mode) + rt.start_primary() + primary_dir = str(rt.primary.datadir) + # Files present in both primary and standby (created before the backup). + _make_tree( + primary_dir, + [ + ("tst_both_dir/both_file1", "in both1"), + ("tst_both_dir/both_file2", "in both2"), + ("tst_both_dir/both_subdir/both_file3", "in both3"), + ], + ) + rt.create_standby(test_mode) + standby_dir = str(rt.standby.datadir) + # Files only on the standby (after the backup). + _make_tree( + standby_dir, + [ + ("tst_standby_dir/standby_file1", "in standby1"), + ("tst_standby_dir/standby_file2", "in standby2"), + ("tst_standby_dir/standby_file3 with 'quotes'", "in standby3"), + ("tst_standby_dir/standby_subdir/standby_file4", "in standby4"), + ], + ) + # Files only on the primary (after promotion); pg_rewind should remove them. + _make_tree( + primary_dir, + [ + ("tst_primary_dir/primary_file1", "in primary1"), + ("tst_primary_dir/primary_file2", "in primary2"), + ("tst_primary_dir/primary_subdir/primary_file3", "in primary3"), + ], + ) + rt.promote_standby() + rt.run_pg_rewind(test_mode) + # Every tst_* path remaining under the primary's data dir. + paths = [] + for dirpath, dirs, files in os.walk(primary_dir): + for name in dirs + files: + full = os.path.join(dirpath, name) + if "tst_" in full: + paths.append(full) + expected = [ + primary_dir + suffix + for suffix in [ + "/tst_both_dir", + "/tst_both_dir/both_file1", + "/tst_both_dir/both_file2", + "/tst_both_dir/both_subdir", + "/tst_both_dir/both_subdir/both_file3", + "/tst_standby_dir", + "/tst_standby_dir/standby_file1", + "/tst_standby_dir/standby_file2", + "/tst_standby_dir/standby_file3 with 'quotes'", + "/tst_standby_dir/standby_subdir", + "/tst_standby_dir/standby_subdir/standby_file4", + ] + ] + assert sorted(paths) == sorted(expected), "file lists match" + rt.clean_rewind_test() + + +def test_003_extrafiles(rewind_test): + """pg_rewind reconciles extra files/dirs (local and remote modes).""" + _run_test(rewind_test, "local") + _run_test(rewind_test, "remote") diff --git a/src/bin/pg_rewind/pyt/test_004_pg_xlog_symlink.py b/src/bin/pg_rewind/pyt/test_004_pg_xlog_symlink.py new file mode 100644 index 0000000000000..8f3a5304788fa --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_004_pg_xlog_symlink.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/004_pg_xlog_symlink.pl. + +pg_rewind works when the target's pg_wal is a symlink to an out-of-tree +directory: after divergence and rewind, the rewound primary's table content +matches the promoted standby. Exercised for 'local' and 'remote' source modes. +""" + +import os +import shutil + + +def _run_test(rt, tmp_path, test_mode): + xlogdir = str(tmp_path / ("xlog_primary_" + test_mode)) + if os.path.exists(xlogdir): + shutil.rmtree(xlogdir) + rt.setup_cluster(test_mode) + pg_wal = os.path.join(str(rt.primary.datadir), "pg_wal") + # Turn pg_wal into a symlink to an out-of-tree directory. + shutil.move(pg_wal, xlogdir) + os.symlink(xlogdir, pg_wal) + rt.start_primary() + rt.primary_psql("CREATE TABLE tbl1 (d text)") + rt.primary_psql("INSERT INTO tbl1 VALUES ('in primary')") + rt.primary_psql("CHECKPOINT") + rt.create_standby(test_mode) + rt.primary_psql("INSERT INTO tbl1 values ('in primary, before promotion')") + rt.primary_psql("CHECKPOINT") + rt.promote_standby() + # Diverge the old primary and the promoted standby. + rt.primary_psql("INSERT INTO tbl1 VALUES ('in primary, after promotion')") + rt.standby_psql("INSERT INTO tbl1 VALUES ('in standby, after promotion')") + rt.run_pg_rewind(test_mode) + rt.check_query( + "SELECT * FROM tbl1", + "in primary\nin primary, before promotion\nin standby, after promotion", + "table content", + ) + rt.clean_rewind_test() + + +def test_004_pg_xlog_symlink(rewind_test, tmp_path): + """pg_rewind with pg_wal as an out-of-tree symlink (local and remote).""" + _run_test(rewind_test, tmp_path, "local") + _run_test(rewind_test, tmp_path, "remote") diff --git a/src/bin/pg_rewind/pyt/test_005_same_timeline.py b/src/bin/pg_rewind/pyt/test_005_same_timeline.py new file mode 100644 index 0000000000000..41ec43f9d037d --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_005_same_timeline.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/005_same_timeline.pl. + +pg_rewind succeeds (does nothing harmful) when the source standby and the +target primary are still on the same timeline -- i.e. the standby was never +promoted, so there is no divergence to rewind. +""" + + +def test_005_same_timeline(rewind_test): + """pg_rewind with source and target on the same timeline.""" + rewind_test.setup_cluster() + rewind_test.start_primary() + rewind_test.create_standby() + rewind_test.run_pg_rewind("local") + rewind_test.clean_rewind_test() diff --git a/src/bin/pg_rewind/pyt/test_006_options.py b/src/bin/pg_rewind/pyt/test_006_options.py new file mode 100644 index 0000000000000..db9230d37a5a0 --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_006_options.py @@ -0,0 +1,57 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_rewind/t/006_options.pl. + +pg_rewind command-line option validation: too many arguments, no source, both remote and local sources, and --write-recovery-conf without a local source each fail. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_006_options(pg_bin, tmp_path): + """pg_rewind command-line option validation.""" + pg_bin.program_help_ok("pg_rewind") + pg_bin.program_version_ok("pg_rewind") + pg_bin.program_options_handling_ok("pg_rewind") + primary_pgdata = tmp_path + standby_pgdata = tmp_path + pg_bin.command_fails( + [ + "pg_rewind", + "--debug", + "--target-pgdata", + str(primary_pgdata), + "--source-pgdata", + str(standby_pgdata), + "extra_arg1", + ], + "too many arguments", + ) + pg_bin.command_fails( + ["pg_rewind", "--target-pgdata", str(primary_pgdata)], "no source specified" + ) + pg_bin.command_fails( + [ + "pg_rewind", + "--debug", + "--target-pgdata", + str(primary_pgdata), + "--source-pgdata", + str(standby_pgdata), + "--source-server", + "incorrect_source", + ], + "both remote and local sources specified", + ) + pg_bin.command_fails( + [ + "pg_rewind", + "--debug", + "--target-pgdata", + str(primary_pgdata), + "--source-pgdata", + str(standby_pgdata), + "--write-recovery-conf", + ], + "no local source with --write-recovery-conf", + ) diff --git a/src/bin/pg_rewind/pyt/test_007_standby_source.py b/src/bin/pg_rewind/pyt/test_007_standby_source.py new file mode 100644 index 0000000000000..2808044c7821a --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_007_standby_source.py @@ -0,0 +1,82 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/007_standby_source.pl. + +pg_rewind can use a standby (here node_b, a cascading source) as its source +server. A chain A->B->C is built; C is promoted and diverges from A (which keeps +streaming to B). C is then rewound from B (--source-server --write-recovery-conf), +so C rejoins A's history and continues replaying A's later changes through B. +""" + +import os +import shutil +import tempfile + + +def test_007_standby_source(rewind_test, create_pg): + """pg_rewind uses a (cascading) standby as its source server.""" + tmp_folder = tempfile.mkdtemp(prefix="standbysrc_") + rewind_test.setup_cluster("a") + rewind_test.start_primary() + node_a = rewind_test.primary + node_a.safe_psql("CREATE TABLE tbl1 (d text)") + node_a.safe_psql("INSERT INTO tbl1 VALUES ('in A')") + rewind_test.primary_psql("CHECKPOINT") + node_a.backup("my_backup") + node_b = create_pg( + "node_b", from_backup=(node_a, "my_backup"), has_streaming=True, start=False + ) + node_b.set_standby_mode() + node_b.start() + node_b.backup("my_backup") + node_c = create_pg( + "node_c", from_backup=(node_b, "my_backup"), has_streaming=True, start=False + ) + node_c.set_standby_mode() + node_c.start() + node_a.safe_psql("INSERT INTO tbl1 values ('in A, before promotion')") + node_a.safe_psql("CHECKPOINT") + lsn = node_a.lsn("write") + node_a.wait_for_catchup("node_b", "write", lsn) + node_b.wait_for_catchup("node_c", "write", lsn) + node_c.promote() + node_a.safe_psql("INSERT INTO tbl1 VALUES ('in A, after C was promoted')") + node_a.wait_for_catchup("node_b") + node_c.safe_psql("INSERT INTO tbl1 VALUES ('in C, after C was promoted')") + node_c_pgdata = str(node_c.datadir) + node_c.stop("fast") + saved_conf = os.path.join(tmp_folder, "node_c-postgresql.conf.tmp") + shutil.copy(os.path.join(node_c_pgdata, "postgresql.conf"), saved_conf) + node_c.bin.command_ok( + [ + "pg_rewind", + "--debug", + "--source-server", + node_b.connstr("postgres"), + "--target-pgdata", + node_c_pgdata, + "--no-sync", + "--write-recovery-conf", + ], + "pg_rewind remote", + extra_env={"PGAPPNAME": ""}, + ) + shutil.move(saved_conf, os.path.join(node_c_pgdata, "postgresql.conf")) + node_c.start() + rewind_test.primary = node_c + rewind_test.check_query( + "SELECT * FROM tbl1", + "in A\nin A, before promotion\nin A, after C was promoted", + "table content after rewind", + ) + node_a.safe_psql("INSERT INTO tbl1 values ('in A, after rewind')") + node_b.wait_for_replay_catchup("node_c", node_a) + rewind_test.check_query( + "SELECT * FROM tbl1", + "in A\nin A, before promotion\nin A, after C was promoted\n" + "in A, after rewind", + "table content after rewind and insert", + ) + node_a.teardown_node() + node_b.teardown_node() + node_c.teardown_node() diff --git a/src/bin/pg_rewind/pyt/test_008_min_recovery_point.py b/src/bin/pg_rewind/pyt/test_008_min_recovery_point.py new file mode 100644 index 0000000000000..f3f2abace5b3d --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_008_min_recovery_point.py @@ -0,0 +1,74 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/008_min_recovery_point.pl. + +A three-node chain (node_1, node_2, node_3 all from one backup) is reconfigured +across two promotions so that node_2 diverges from the latest primary (node_1). +pg_rewind --source-server rewinds node_2 onto node_1; afterward node_2 has +node_1's history (table foo rows kept) and the divergent change on node_3 (the +extra bar row) is gone. +""" + +import os +import shutil +import tempfile + + +def test_008_min_recovery_point(create_pg): + """pg_rewind rewinds a diverged standby across promotions via --source-server.""" + tmp_folder = tempfile.mkdtemp(prefix="minrp_") + node1 = create_pg("node_1", allows_streaming=True, start=False) + node1.append_conf("\nwal_keep_size='100 MB'\n") + node1.start() + node1.safe_psql("CREATE TABLE public.foo (t TEXT)") + node1.safe_psql("CREATE TABLE public.bar (t TEXT)") + node1.safe_psql("INSERT INTO public.bar VALUES ('in both')") + node1.backup("my_backup") + node2 = create_pg( + "node_2", from_backup=(node1, "my_backup"), has_streaming=True, start=False + ) + node2.start() + node3 = create_pg( + "node_3", from_backup=(node1, "my_backup"), has_streaming=True, start=False + ) + node3.start() + node1.wait_for_catchup("node_3") + node1.stop("fast") + node3.promote() + node3_connstr = node3.connstr() + node1.append_conf("\nprimary_conninfo='{}'\n".format(node3_connstr)) + node1.set_standby_mode() + node1.start() + node2.append_conf("\nprimary_conninfo='{}'\n".format(node3_connstr)) + node2.restart() + node3.wait_for_catchup("node_1") + node1.promote() + node1.safe_psql("INSERT INTO public.foo (t) VALUES ('keep this')") + node3.safe_psql("INSERT INTO public.bar (t) VALUES ('rewind this')") + node1.safe_psql("INSERT INTO public.foo (t) VALUES ('and this')") + node1.safe_psql("INSERT INTO public.foo (t) VALUES ('and this too')") + assert node2.poll_query_until("SELECT COUNT(*) > 1 FROM public.bar", "t") + node2.stop("fast") + node3.stop("fast") + node2_pgdata = str(node2.datadir) + saved_conf = os.path.join(tmp_folder, "node_2-postgresql.conf.tmp") + shutil.copy(os.path.join(node2_pgdata, "postgresql.conf"), saved_conf) + node2.command_ok( + [ + "pg_rewind", + "--source-server", + node1.connstr(), + "--target-pgdata", + node2_pgdata, + "--debug", + ], + "run pg_rewind", + ) + shutil.move(saved_conf, os.path.join(node2_pgdata, "postgresql.conf")) + node2.start() + assert node2.safe_psql("SELECT * FROM public.foo") == ( + "keep this\nand this\nand this too" + ), "table foo after rewind" + assert ( + node2.safe_psql("SELECT * FROM public.bar") == "in both" + ), "table bar after rewind" diff --git a/src/bin/pg_rewind/pyt/test_009_growing_files.py b/src/bin/pg_rewind/pyt/test_009_growing_files.py new file mode 100644 index 0000000000000..672642bc44a0b --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_009_growing_files.py @@ -0,0 +1,60 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/009_growing_files.pl. + +pg_rewind must error out if a source file grows while it is being copied. The +source file's own size is made to change mid-copy by redirecting pg_rewind's +stderr (--debug) into that very file, so the file the rewind is copying keeps +growing; pg_rewind detects the mismatch and fails with "size of source file". +""" + +import os + + +def test_009_growing_files(rewind_test, pg_bin): + """pg_rewind errors when a source file grows during the copy.""" + rewind_test.setup_cluster("local") + rewind_test.start_primary() + rewind_test.primary_psql("CREATE TABLE tbl1 (d text)") + rewind_test.primary_psql("INSERT INTO tbl1 VALUES ('in primary')") + rewind_test.primary_psql("CHECKPOINT") + rewind_test.create_standby("local") + rewind_test.primary_psql("INSERT INTO tbl1 values ('in primary, before promotion')") + rewind_test.primary_psql("CHECKPOINT") + rewind_test.promote_standby() + rewind_test.primary_psql("INSERT INTO tbl1 VALUES ('in primary, after promotion')") + rewind_test.standby_psql("INSERT INTO tbl1 VALUES ('in standby, after promotion')") + primary = rewind_test.primary + standby = rewind_test.standby + standby.stop() + primary.stop() + primary_pgdata = str(primary.datadir) + standby_pgdata = str(standby.datadir) + both_dir = os.path.join(standby_pgdata, "tst_both_dir") + os.mkdir(both_dir) + file1 = os.path.join(both_dir, "file1") + with open(file1, "w", encoding="utf-8") as fh: + fh.write("a") + rc = pg_bin.run_redirect_stderr( + [ + "pg_rewind", + "--debug", + "--source-pgdata", + standby_pgdata, + "--target-pgdata", + primary_pgdata, + "--no-sync", + ], + file1, + ) + assert rc != 0, "Error out on copying growing file" + primary_size = os.path.getsize( + os.path.join(primary_pgdata, "tst_both_dir", "file1") + ) + standby_size = os.path.getsize(file1) + assert standby_size != primary_size, "File sizes should differ" + last = "" + with open(file1, encoding="utf-8", errors="replace") as fh: + for line in fh: + last = line + assert "error: size of source file" in last, "Check error message" diff --git a/src/bin/pg_rewind/pyt/test_010_keep_recycled_wals.py b/src/bin/pg_rewind/pyt/test_010_keep_recycled_wals.py new file mode 100644 index 0000000000000..60f2d44ab74e2 --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_010_keep_recycled_wals.py @@ -0,0 +1,48 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/010_keep_recycled_wals.pl. + +pg_rewind must not delete WAL segments that are still required for recovery on +the target. With archiving wedged (a failing archive_command) so segments are +retained, a rewind of the diverged old primary from the promoted standby reports +"Not removing file ... because it is required for recovery" for the kept files. +""" + +import re + + +def test_010_keep_recycled_wals(rewind_test, pg_bin): + """pg_rewind keeps WAL segments still required for recovery on the target.""" + rewind_test.setup_cluster() + primary = rewind_test.primary + primary.enable_archiving() + rewind_test.start_primary() + rewind_test.create_standby() + standby = rewind_test.standby + standby.enable_restoring(primary, standby=False) + standby.reload() + rewind_test.primary_psql("CHECKPOINT") # last common checkpoint + primary.append_conf("\narchive_command = 'false'\n") + primary.reload() + rewind_test.primary_psql("CREATE TABLE t(a int)") + rewind_test.primary_psql("INSERT INTO t VALUES(0)") + rewind_test.primary_psql("SELECT pg_switch_wal()") + rewind_test.promote_standby() + rewind_test.standby_psql("INSERT INTO t values(0)") + rewind_test.standby_psql("SELECT pg_switch_wal()") + standby.stop() + primary.stop() + result = pg_bin.run_command( + [ + "pg_rewind", + "--debug", + "--source-pgdata", + str(standby.datadir), + "--target-pgdata", + str(primary.datadir), + "--no-sync", + ] + ) + assert re.search( + r"Not removing file .* because it is required for recovery", result.stderr + ), "some WAL files were skipped" diff --git a/src/bin/pg_rewind/pyt/test_011_wal_copy.py b/src/bin/pg_rewind/pyt/test_011_wal_copy.py new file mode 100644 index 0000000000000..2c96bc60161b1 --- /dev/null +++ b/src/bin/pg_rewind/pyt/test_011_wal_copy.py @@ -0,0 +1,84 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_rewind/t/011_wal_copy.pl. + +pg_rewind copies the WAL segments it needs from the source: a segment already +present and identical on the target is skipped (NONE), a segment that differs +(here deliberately corrupted on the target) is copied (COPY), and a +new-timeline segment absent on the target is copied (COPY). After the rewind the +corrupted target segment matches the source size. +""" + +import os + + +def test_011_wal_copy(rewind_test): + """pg_rewind reports NONE/COPY per WAL segment and copies the needed ones.""" + rewind_test.setup_cluster() + rewind_test.start_primary() + rewind_test.create_standby() + primary = rewind_test.primary + standby = rewind_test.standby + rewind_test.primary_psql("CREATE TABLE t(a int)") + rewind_test.primary_psql("INSERT INTO t VALUES(0)") + wal_seg_skipped = primary.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn())") + rewind_test.primary_psql("SELECT pg_switch_wal()") + rewind_test.primary_psql("INSERT INTO t VALUES(0)") + corrupt_wal_seg = primary.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn())") + rewind_test.primary_psql("SELECT pg_switch_wal()") + rewind_test.primary_psql("CHECKPOINT") + rewind_test.promote_standby() + new_tl_seg = standby.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn())") + corrupt_target = primary.datadir / "pg_wal" / corrupt_wal_seg + with open(corrupt_target, "a", encoding="utf-8") as fh: + fh.write("a") + assert corrupt_target.is_file(), "segment {} exists in target before rewind".format( + corrupt_wal_seg + ) + new_tl_target = primary.datadir / "pg_wal" / new_tl_seg + assert ( + not new_tl_target.exists() + ), "segment {} does not exist in target before rewind".format(new_tl_seg) + size_before = os.path.getsize(corrupt_target) + standby.stop() + primary.stop() + primary.command_checks_all( + [ + "pg_rewind", + "--debug", + "--source-pgdata", + str(standby.datadir), + "--target-pgdata", + str(primary.datadir), + "--no-sync", + ], + 0, + [r""], + [ + r"pg_wal/{} \(NONE\)".format(wal_seg_skipped), + r"pg_wal/{} \(COPY\)".format(corrupt_wal_seg), + r"pg_wal/{} \(COPY\)".format(new_tl_seg), + ], + "run pg_rewind", + ) + assert ( + new_tl_target.is_file() + ), "new timeline segment {} exists in target after rewind".format(new_tl_seg) + corrupt_source = standby.datadir / "pg_wal" / corrupt_wal_seg + assert ( + corrupt_source.is_file() + ), "corrupted {} exists in source after rewind".format(corrupt_wal_seg) + assert ( + corrupt_target.is_file() + ), "corrupted {} exists in target after rewind".format(corrupt_wal_seg) + source_size = os.path.getsize(corrupt_source) + assert ( + size_before != source_size + ), "different size of corrupted {} in source vs target before rewind".format( + corrupt_wal_seg + ) + assert ( + os.path.getsize(corrupt_target) == source_size + ), "same size of corrupted {} in source and target after rewind".format( + corrupt_wal_seg + ) diff --git a/src/bin/pg_verifybackup/meson.build b/src/bin/pg_verifybackup/meson.build index 0b21db9f1b53c..cb2a453a62383 100644 --- a/src/bin/pg_verifybackup/meson.build +++ b/src/bin/pg_verifybackup/meson.build @@ -22,6 +22,20 @@ tests += { 'name': 'pg_verifybackup', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_algorithm.py', + 'pyt/test_004_options.py', + 'pyt/test_003_corruption.py', + 'pyt/test_005_bad_manifest.py', + 'pyt/test_006_encoding.py', + 'pyt/test_007_wal.py', + 'pyt/test_008_untar.py', + 'pyt/test_009_extract.py', + 'pyt/test_010_client_untar.py', + ], + }, 'tap': { 'env': {'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', 'TAR': tar.found() ? tar.full_path() : '', diff --git a/src/bin/pg_verifybackup/pyt/test_001_basic.py b/src/bin/pg_verifybackup/pyt/test_001_basic.py new file mode 100644 index 0000000000000..7884e7c5fa60e --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_001_basic.py @@ -0,0 +1,43 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_verifybackup/t/001_basic.pl. + +pg_verifybackup argument validation: missing/invalid target directory, missing +backup_manifest, too many arguments, and the --manifest-path option pointing at +a nonexistent manifest. Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_basic(pg_bin, tmp_path): + """pg_verifybackup argument and manifest-path validation.""" + tempdir = tmp_path + pg_bin.program_help_ok("pg_verifybackup") + pg_bin.program_version_ok("pg_verifybackup") + pg_bin.program_options_handling_ok("pg_verifybackup") + pg_bin.command_fails_like( + ["pg_verifybackup"], + r"""no backup directory specified""", + "target directory must be specified", + ) + pg_bin.command_fails_like( + ["pg_verifybackup", str(tempdir)], + r'''could not open file.*\/backup_manifest\"''', + "pg_verifybackup requires a manifest", + ) + pg_bin.command_fails_like( + ["pg_verifybackup", str(tempdir), str(tempdir)], + r"""too many command-line arguments""", + "multiple target directories not allowed", + ) + (tmp_path / "backup_manifest").write_text("", encoding="utf-8") + pg_bin.command_fails_like( + [ + "pg_verifybackup", + "--manifest-path", + str(tempdir) + "/not_the_manifest", + str(tempdir), + ], + r'''could not open file.*\/not_the_manifest\"''', + "pg_verifybackup respects -m flag", + ) diff --git a/src/bin/pg_verifybackup/pyt/test_002_algorithm.py b/src/bin/pg_verifybackup/pyt/test_002_algorithm.py new file mode 100644 index 0000000000000..c170629519c60 --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_002_algorithm.py @@ -0,0 +1,69 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_verifybackup/t/002_algorithm.pl. + +pg_basebackup honors --manifest-checksums for every supported algorithm in both +plain and tar formats: a bogus algorithm is rejected, a valid one is recorded +throughout the manifest (or just produces a manifest for 'none'), and the +resulting backup verifies. +""" + +import os +import shutil + +import pypg + + +def _test_checksums(primary, fmt, algorithm): + backup_path = "{}/{}/{}".format(primary.backup_dir, fmt, algorithm) + backup = [ + "pg_basebackup", + "--pgdata", + backup_path, + "--manifest-checksums", + algorithm, + "--no-sync", + "--checkpoint", + "fast", + ] + verify = ["pg_verifybackup", "--exit-on-error", backup_path] + if fmt == "tar": + backup += ["--format", "tar"] + if algorithm == "bogus": + primary.command_fails( + backup, '{} format backup fails with algorithm "{}"'.format(fmt, algorithm) + ) + return + primary.command_ok( + backup, '{} format backup ok with algorithm "{}"'.format(fmt, algorithm) + ) + if algorithm == "none": + assert os.path.isfile( + "{}/backup_manifest".format(backup_path) + ), "{} format backup manifest exists".format(fmt) + else: + manifest = pypg.slurp_file("{}/backup_manifest".format(backup_path)) + count = manifest.lower().count(algorithm.lower()) + assert count > 100, "{} is mentioned many times in the manifest".format( + algorithm + ) + primary.command_ok( + verify, 'verify {} format backup with algorithm "{}"'.format(fmt, algorithm) + ) + shutil.rmtree(backup_path) + + +def test_002_algorithm(create_pg): + """pg_basebackup --manifest-checksums across formats and algorithms.""" + primary = create_pg("primary", allows_streaming=True) + for fmt in ("plain", "tar"): + for algorithm in ( + "bogus", + "none", + "crc32c", + "sha224", + "sha256", + "sha384", + "sha512", + ): + _test_checksums(primary, fmt, algorithm) diff --git a/src/bin/pg_verifybackup/pyt/test_003_corruption.py b/src/bin/pg_verifybackup/pyt/test_003_corruption.py new file mode 100644 index 0000000000000..96f6ef8db8854 --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_003_corruption.py @@ -0,0 +1,304 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_verifybackup/t/003_corruption.pl. + +pg_verifybackup detects each way a base backup can be corrupted: extra files +(in the data dir and in a tablespace), missing files/tablespaces, appended or +truncated files, a replaced file (checksum mismatch), a wrong system identifier, +a bad manifest checksum, and unreadable files/directories. Each scenario takes a +fresh tablespace-mapped backup, verifies it intact, mutilates it, and checks +pg_verifybackup fails with the matching message -- for both directory-format and +(where applicable) tar-format backups. +""" + +import os +import shutil +import subprocess +import tempfile + +import pypg + + +def _create_extra_file(backup_path, relative_path): + with open(os.path.join(backup_path, relative_path), "w", encoding="utf-8") as fh: + fh.write("This is an extra file.\n") + + +def _only_entry(path): + return [e for e in pypg.slurp_dir(path) if e not in (".", "..")][0] + + +def _mutilate_extra_file(backup_path): + _create_extra_file(backup_path, "extra_file") + + +def _mutilate_extra_tablespace_file(backup_path): + tsoid = _only_entry(os.path.join(backup_path, "pg_tblspc")) + catvdir = _only_entry(os.path.join(backup_path, "pg_tblspc", tsoid)) + tsdboid = _only_entry(os.path.join(backup_path, "pg_tblspc", tsoid, catvdir)) + _create_extra_file( + backup_path, "pg_tblspc/{}/{}/{}/extra_ts_file".format(tsoid, catvdir, tsdboid) + ) + + +def _mutilate_missing_file(backup_path): + os.unlink(os.path.join(backup_path, "pg_xact", "0000")) + + +def _mutilate_missing_tablespace(backup_path): + tsoid = _only_entry(os.path.join(backup_path, "pg_tblspc")) + os.unlink(os.path.join(backup_path, "pg_tblspc", tsoid)) + + +def _mutilate_append_to_file(backup_path): + pypg.append_to_file(os.path.join(backup_path, "global", "pg_control"), "x") + + +def _mutilate_truncate_file(backup_path): + with open(os.path.join(backup_path, "pg_hba.conf"), "w", encoding="utf-8"): + pass + + +def _mutilate_replace_file(backup_path): + pathname = os.path.join(backup_path, "PG_VERSION") + contents = pypg.slurp_file(pathname) + with open(pathname, "w", encoding="utf-8") as fh: + fh.write("q" * len(contents)) + + +def _mutilate_bad_manifest(backup_path): + pypg.append_to_file(os.path.join(backup_path, "backup_manifest"), "\n") + + +def _mutilate_open_file_fails(backup_path): + os.chmod(os.path.join(backup_path, "PG_VERSION"), 0) + + +def _mutilate_open_directory_fails(backup_path): + os.chmod(os.path.join(backup_path, "pg_subtrans"), 0) + + +def _cleanup_open_directory_fails(backup_path): + os.chmod(os.path.join(backup_path, "pg_subtrans"), 0o700) + + +def _mutilate_search_directory_fails(backup_path): + os.chmod(os.path.join(backup_path, "base"), 0o400) + + +def _cleanup_search_directory_fails(backup_path): + os.chmod(os.path.join(backup_path, "base"), 0o700) + + +def _make_system_identifier_mutilator(create_pg): + def mutilate(backup_path): + node = create_pg("node", force_initdb=True, allows_streaming=True) + node.backup("backup2") + shutil.move( + os.path.join(str(node.backup_dir), "backup2", "backup_manifest"), + os.path.join(backup_path, "backup_manifest"), + ) + node.teardown_node(fail_ok=True) + + return mutilate + + +def _scenarios(create_pg): + return [ + ( + "extra_file", + _mutilate_extra_file, + None, + r'extra_file.*present (on disk|in archive "[^"]+") but not in the manifest', + False, + ), + ( + "extra_tablespace_file", + _mutilate_extra_tablespace_file, + None, + r'extra_ts_file.*present (on disk|in archive "[^"]+") but not in the manifest', + False, + ), + ( + "missing_file", + _mutilate_missing_file, + None, + r'pg_xact/0000.*present in the manifest but not (on disk|in archive "[^"]+")', + False, + ), + ( + "missing_tablespace", + _mutilate_missing_tablespace, + None, + r'pg_tblspc.*present in the manifest but not (on disk|in archive "[^"]+")', + False, + ), + ( + "append_to_file", + _mutilate_append_to_file, + None, + r'has size \d+ (on disk|in archive "[^"]+") but size \d+ in the manifest', + False, + ), + ( + "truncate_file", + _mutilate_truncate_file, + None, + r'has size 0 (on disk|in archive "[^"]+") but size \d+ in the manifest', + False, + ), + ( + "replace_file", + _mutilate_replace_file, + None, + r"checksum mismatch for file", + False, + ), + ( + "system_identifier", + _make_system_identifier_mutilator(create_pg), + None, + r"manifest system identifier is .*, but control file has", + False, + ), + ( + "bad_manifest", + _mutilate_bad_manifest, + None, + r"manifest checksum mismatch", + False, + ), + ( + "open_file_fails", + _mutilate_open_file_fails, + None, + r"could not open file", + True, + ), + ( + "open_directory_fails", + _mutilate_open_directory_fails, + _cleanup_open_directory_fails, + r"could not open directory", + True, + ), + ( + "search_directory_fails", + _mutilate_search_directory_fails, + _cleanup_search_directory_fails, + r"could not stat file or directory", + True, + ), + ] + + +def _tar_check(primary, name, backup_path, fails_like, tar, tar_flags): + tar_backup_path = os.path.join(str(primary.backup_dir), "tar_" + name) + os.mkdir(tar_backup_path) + tblspc = os.path.join(backup_path, "pg_tblspc") + for tsoid in [e for e in pypg.slurp_dir(tblspc) if e not in (".", "..")]: + tspath = os.path.join(tblspc, tsoid) + subprocess.run( + [ + tar, + *tar_flags, + "-cf", + os.path.join(tar_backup_path, tsoid + ".tar"), + ".", + ], + cwd=tspath, + check=True, + ) + shutil.rmtree(tspath) + subprocess.run( + [tar, *tar_flags, "-cf", os.path.join(tar_backup_path, "pg_wal.tar"), "."], + cwd=os.path.join(backup_path, "pg_wal"), + check=True, + ) + shutil.rmtree(os.path.join(backup_path, "pg_wal")) + shutil.move( + os.path.join(backup_path, "backup_manifest"), + os.path.join(tar_backup_path, "backup_manifest"), + ) + subprocess.run( + [tar, *tar_flags, "-cf", os.path.join(tar_backup_path, "base.tar"), "."], + cwd=backup_path, + check=True, + ) + primary.command_fails_like( + ["pg_verifybackup", tar_backup_path], + fails_like, + "corrupt backup fails verification: " + name, + ) + shutil.rmtree(tar_backup_path) + + +def _tar_portability_options(tar): + """Return portability flags for tar (mirrors Utils::tar_portability_options). + + Prefer GNU/BSD ustar with owner/group 0; fall back to OpenBSD '-F ustar'; + otherwise no flags. + """ + if not tar: + return [] + devnull = os.devnull + gnu = subprocess.run( + [tar, "--format=ustar", "--owner=0", "--group=0", "-cf", devnull, devnull], + stderr=subprocess.DEVNULL, + check=False, + ) + if gnu.returncode == 0: + return ["--format=ustar", "--owner=0", "--group=0"] + obsd = subprocess.run( + [tar, "-F", "ustar", "-cf", devnull, devnull], + stderr=subprocess.DEVNULL, + check=False, + ) + if obsd.returncode == 0: + return ["-F", "ustar"] + return [] + + +def test_003_corruption(create_pg): + """pg_verifybackup detects each kind of backup corruption.""" + tar = os.environ.get("TAR") + tar_flags = _tar_portability_options(tar) + primary = create_pg("primary", allows_streaming=True) + source_ts_path = tempfile.mkdtemp(prefix="ts_") + primary.safe_psql( + "CREATE TABLE x1 (a int);\nINSERT INTO x1 VALUES (111);\n" + "CREATE TABLESPACE ts1 LOCATION '{}';\n" + "CREATE TABLE x2 (a int) TABLESPACE ts1;\n" + "INSERT INTO x1 VALUES (222);".format(source_ts_path) + ) + for name, mutilate, cleanup, fails_like, needs_perms in _scenarios(create_pg): + if needs_perms and os.name == "nt": + continue + backup_path = os.path.join(str(primary.backup_dir), name) + backup_ts_path = tempfile.mkdtemp(prefix="ts_") + primary.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup_path, + "--no-sync", + "--checkpoint", + "fast", + "--tablespace-mapping", + "{}={}".format(source_ts_path, backup_ts_path), + ], + "base backup ok", + ) + primary.command_ok(["pg_verifybackup", backup_path], "intact backup verified") + mutilate(backup_path) + primary.command_fails_like( + ["pg_verifybackup", backup_path], + fails_like, + "corrupt backup fails verification: " + name, + ) + if cleanup: + cleanup(backup_path) + if not needs_perms and tar: + _tar_check(primary, name, backup_path, fails_like, tar, tar_flags) + shutil.rmtree(backup_path, ignore_errors=True) diff --git a/src/bin/pg_verifybackup/pyt/test_004_options.py b/src/bin/pg_verifybackup/pyt/test_004_options.py new file mode 100644 index 0000000000000..57d553bc1cb7a --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_004_options.py @@ -0,0 +1,113 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_verifybackup/t/004_options.pl. + +pg_verifybackup option handling: --quiet is silent on success, --format +plain/tar validation, mutually-exclusive --progress/--quiet, checksum-mismatch +detection, --skip-checksums, --ignore (single/multiple/insufficient), +multiple-problem and --exit-on-error reporting, and a nonexistent directory. +""" + +import re +import shutil + +import pypg + + +def test_004_options(create_pg): + """pg_verifybackup option/error handling across many scenarios.""" + primary = create_pg("primary", allows_streaming=True) + backup_path = "{}/test_options".format(primary.backup_dir) + primary.command_ok( + ["pg_basebackup", "--pgdata", backup_path, "--no-sync", "--checkpoint", "fast"], + "base backup ok", + ) + res = primary.bin.result(["pg_verifybackup", "--quiet", backup_path]) + assert res.rc == 0, "--quiet succeeds: exit code 0" + assert res.stdout == "", "--quiet succeeds: no stdout" + assert res.stderr == "", "--quiet succeeds: no stderr" + primary.command_ok( + ["pg_verifybackup", "--format", "plain", backup_path], + "verifies with --format=plain", + ) + primary.command_fails_like( + ["pg_verifybackup", "--format", "y", backup_path], + r'invalid backup format "y", must be "plain" or "tar"', + "does not verify with --format=y", + ) + primary.command_fails_like( + ["pg_verifybackup", "--format", "tar", "--no-parse-wal", backup_path], + r'"pg_multixact" is not a regular file', + "does not verify with --format=tar --no-parse-wal", + ) + primary.command_fails_like( + ["pg_verifybackup", "--progress", "--quiet", backup_path], + r"cannot specify both -P/--progress and -q/--quiet", + "cannot use --progress and --quiet at the same time", + ) + version_pathname = "{}/PG_VERSION".format(backup_path) + version_contents = pypg.slurp_file(version_pathname) + with open(version_pathname, "w", encoding="utf-8") as fh: + fh.write("q" * len(version_contents)) + primary.command_fails_like( + ["pg_verifybackup", "--quiet", backup_path], + r'checksum mismatch for file "PG_VERSION"', + "--quiet checksum mismatch", + ) + primary.command_like( + ["pg_verifybackup", "--skip-checksums", backup_path], + r"backup successfully verified", + "--skip-checksums skips checksumming", + ) + primary.command_checks_all( + ["pg_verifybackup", "--progress", "--ignore", "PG_VERSION", backup_path], + 0, + [r"backup successfully verified"], + [r"(\d+/\d+ kB \(\d+%\) verified)+"], + "--ignore ignores problem file", + ) + shutil.rmtree("{}/pg_xact".format(backup_path)) + primary.command_fails_like( + ["pg_verifybackup", "--ignore", "PG_VERSION", backup_path], + r"pg_xact.*is present in the manifest but not on disk", + "--ignore does not ignore all problems", + ) + primary.command_like( + [ + "pg_verifybackup", + "--ignore", + "PG_VERSION", + "--ignore", + "pg_xact", + backup_path, + ], + r"backup successfully verified", + "multiple --ignore options work", + ) + res = primary.bin.result(["pg_verifybackup", backup_path]) + assert res.rc != 0, "multiple problems: fails" + assert re.search( + r"pg_xact.*is present in the manifest but not on disk", res.stderr + ), "multiple problems: missing files reported" + assert re.search( + r'checksum mismatch for file "PG_VERSION"', res.stderr + ), "multiple problems: checksum mismatch reported" + res = primary.bin.result(["pg_verifybackup", "--exit-on-error", backup_path]) + assert res.rc != 0, "--exit-on-error reports 1 error: fails" + assert re.search( + r"pg_xact.*is present in the manifest but not on disk", res.stderr + ), "--exit-on-error reports 1 error: missing files reported" + assert not re.search( + r'checksum mismatch for file "PG_VERSION"', res.stderr + ), "--exit-on-error reports 1 error: checksum mismatch not reported" + primary.command_fails_like( + [ + "pg_verifybackup", + "--manifest-path", + "{}/backup_manifest".format(backup_path), + "{}/fake".format(backup_path), + ], + r"could not open directory", + "nonexistent backup directory", + ) diff --git a/src/bin/pg_verifybackup/pyt/test_005_bad_manifest.py b/src/bin/pg_verifybackup/pyt/test_005_bad_manifest.py new file mode 100644 index 0000000000000..d22ee94cc91bf --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_005_bad_manifest.py @@ -0,0 +1,175 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_verifybackup/t/005_bad_manifest.pl. + +pg_verifybackup rejects malformed backup manifests with a specific diagnostic +for each kind of error: JSON parse errors, missing/invalid required fields in +Files and WAL-Ranges entries, duplicate paths, unrecognized/invalid checksum +algorithms, and a bad manifest checksum. Each manifest is written to a temp dir +and pg_verifybackup is expected to fail with the matching message. +""" + +import re +import tempfile + + +# (kind, description, manifest_contents). kind: 'parse' -> "could not parse +# backup manifest: "; 'fatal' -> "error: "; 'raw' -> desc is the +# full regex. +_CASES = [ + ( + "raw", + r"could not parse backup manifest: The input string ended unexpectedly", + "{\n", + ), + ("parse", "unexpected object end", "{}\n"), + ("parse", "unexpected array start", "[]\n"), + ("parse", "expected version indicator", '{"not-expected": 1}\n'), + ( + "parse", + "manifest version not an integer", + '{"PostgreSQL-Backup-Manifest-Version": "phooey"}\n', + ), + ( + "parse", + "unexpected manifest version", + '{"PostgreSQL-Backup-Manifest-Version": 9876599}\n', + ), + ( + "parse", + "unexpected scalar", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": true}\n', + ), + ( + "parse", + "unrecognized top-level field", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Oops": 1}\n', + ), + ( + "parse", + "unexpected object start", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": {}}\n', + ), + ( + "parse", + "missing path name", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [{}]}\n', + ), + ( + "parse", + "both path name and encoded path name", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Path": "x", "Encoded-Path": "1234"}\n]}\n', + ), + ( + "parse", + "unexpected file field", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Oops": 1}\n]}\n', + ), + ( + "parse", + "missing size", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Path": "x"}\n]}\n', + ), + ( + "parse", + "file size is not an integer", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Path": "x", "Size": "Oops"}\n]}\n', + ), + ( + "parse", + "could not decode file name", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Encoded-Path": "123", "Size": 0}\n]}\n', + ), + ( + "fatal", + "duplicate path name in backup manifest", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Path": "x", "Size": 0},\n {"Path": "x", "Size": 0}\n]}\n', + ), + ( + "parse", + "checksum without algorithm", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Path": "x", "Size": 100, "Checksum": "Oops"}\n]}\n', + ), + ( + "fatal", + "unrecognized checksum algorithm", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Path": "x", "Size": 100, "Checksum-Algorithm": "Oops", "Checksum": "00"}\n]}\n', + ), + ( + "fatal", + "invalid checksum for file", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [\n {"Path": "x", "Size": 100, "Checksum-Algorithm": "CRC32C", "Checksum": "0"}\n]}\n', + ), + ( + "parse", + "missing start LSN", + '{"PostgreSQL-Backup-Manifest-Version": 1, "WAL-Ranges": [\n {"Timeline": 1}\n]}\n', + ), + ( + "parse", + "missing end LSN", + '{"PostgreSQL-Backup-Manifest-Version": 1, "WAL-Ranges": [\n {"Timeline": 1, "Start-LSN": "0/0"}\n]}\n', + ), + ( + "parse", + "unexpected WAL range field", + '{"PostgreSQL-Backup-Manifest-Version": 1, "WAL-Ranges": [\n {"Oops": 1}\n]}\n', + ), + ( + "parse", + "missing timeline", + '{"PostgreSQL-Backup-Manifest-Version": 1, "WAL-Ranges": [\n {}\n]}\n', + ), + ( + "parse", + "unexpected object end", + '{"PostgreSQL-Backup-Manifest-Version": 1, "WAL-Ranges": [\n {"Timeline": 1, "Start-LSN": "0/0", "End-LSN": "0/0"}\n]}\n', + ), + ( + "parse", + "timeline is not an integer", + '{"PostgreSQL-Backup-Manifest-Version": 1, "WAL-Ranges": [\n {"Timeline": true, "Start-LSN": "0/0", "End-LSN": "0/0"}\n]}\n', + ), + ( + "parse", + "could not parse start LSN", + '{"PostgreSQL-Backup-Manifest-Version": 1, "WAL-Ranges": [\n {"Timeline": 1, "Start-LSN": "oops", "End-LSN": "0/0"}\n]}\n', + ), + ( + "parse", + "could not parse end LSN", + '{"PostgreSQL-Backup-Manifest-Version": 1, "WAL-Ranges": [\n {"Timeline": 1, "Start-LSN": "0/0", "End-LSN": "oops"}\n]}\n', + ), + ( + "parse", + "expected at least 2 lines", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [], "Manifest-Checksum": null}\n', + ), + ( + "parse", + "last line not newline-terminated", + '{"PostgreSQL-Backup-Manifest-Version": 1,\n "Files": [],\n "Manifest-Checksum": null}', + ), + ( + "fatal", + "invalid manifest checksum", + '{"PostgreSQL-Backup-Manifest-Version": 1, "Files": [],\n "Manifest-Checksum": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890-"}\n', + ), +] + + +def test_005_bad_manifest(create_pg): + """pg_verifybackup reports the right diagnostic for each malformed manifest.""" + primary = create_pg("primary") + tempdir = tempfile.mkdtemp(prefix="badmf_") + for kind, desc, contents in _CASES: + with open("{}/backup_manifest".format(tempdir), "w", encoding="utf-8") as fh: + fh.write(contents) + if kind == "parse": + pattern = r"could not parse backup manifest: " + re.escape(desc) + elif kind == "fatal": + pattern = r"error: " + re.escape(desc) + else: + pattern = desc + primary.command_fails_like(["pg_verifybackup", tempdir], pattern, desc) diff --git a/src/bin/pg_verifybackup/pyt/test_006_encoding.py b/src/bin/pg_verifybackup/pyt/test_006_encoding.py new file mode 100644 index 0000000000000..5b13f4e811d3b --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_006_encoding.py @@ -0,0 +1,38 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_verifybackup/t/006_encoding.pl. + +pg_basebackup --manifest-force-encode hex-encodes every path in the backup +manifest; the manifest then contains many Encoded-Path entries and still +verifies successfully. +""" + +import re + +import pypg + + +def test_006_encoding(create_pg): + """A force-encoded manifest has many Encoded-Path entries and verifies.""" + primary = create_pg("primary", allows_streaming=True) + backup_path = "{}/test_encoding".format(primary.backup_dir) + primary.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup_path, + "--no-sync", + "--checkpoint", + "fast", + "--manifest-force-encode", + ], + "backup ok with forced hex encoding", + ) + manifest = pypg.slurp_file("{}/backup_manifest".format(backup_path)) + count = len(re.findall(r"Encoded-Path", manifest, re.IGNORECASE)) + assert count > 100, "many paths are encoded in the manifest" + primary.command_like( + ["pg_verifybackup", "--skip-checksums", backup_path], + r"backup successfully verified", + "backup with forced encoding verified", + ) diff --git a/src/bin/pg_verifybackup/pyt/test_007_wal.py b/src/bin/pg_verifybackup/pyt/test_007_wal.py new file mode 100644 index 0000000000000..8724ad12e66eb --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_007_wal.py @@ -0,0 +1,92 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_verifybackup/t/007_wal.pl. + +pg_verifybackup's WAL handling: a missing pg_wal fails WAL parsing (unless +--no-parse-wal, or --wal-path points at the relocated WAL), a corrupt WAL file +fails parsing, and backups taken on a timeline > 1 and in tar format (separate +pg_wal.tar) verify successfully. +""" + +import os + +import pypg + + +def test_007_wal(create_pg): + """pg_verifybackup WAL parsing: missing/relocated/corrupt/timeline/tar.""" + primary = create_pg("primary", allows_streaming=True) + backup_path = "{}/test_wal".format(primary.backup_dir) + primary.command_ok( + ["pg_basebackup", "--pgdata", backup_path, "--no-sync", "--checkpoint", "fast"], + "base backup ok", + ) + original_pg_wal = "{}/pg_wal".format(backup_path) + relocated_pg_wal = "{}/relocated_pg_wal".format(primary.backup_dir) + os.rename(original_pg_wal, relocated_pg_wal) + primary.command_fails_like( + ["pg_verifybackup", backup_path], + r"WAL parsing failed for timeline 1", + "missing pg_wal causes failure", + ) + primary.command_ok( + ["pg_verifybackup", "--no-parse-wal", backup_path], + "missing pg_wal OK if not verifying WAL", + ) + primary.command_ok( + ["pg_verifybackup", "--wal-path", relocated_pg_wal, backup_path], + "--wal-path can be used to specify WAL directory", + ) + os.rename(relocated_pg_wal, original_pg_wal) + walfiles = [ + f + for f in pypg.slurp_dir(original_pg_wal) + if len(f) == 24 and all(c in "0123456789ABCDEF" for c in f) + ] + target = "{}/{}".format(original_pg_wal, walfiles[0]) + wal_size = os.path.getsize(target) + with open(target, "w", encoding="utf-8") as fh: + fh.write("w" * wal_size) + primary.command_fails_like( + ["pg_verifybackup", backup_path], + r"WAL parsing failed for timeline 1", + "corrupt WAL file causes failure", + ) + primary.stop() + primary.append_conf("", "standby.signal") + primary.start() + primary.promote() + primary.safe_psql("SELECT pg_switch_wal()") + backup_path2 = "{}/test_tli".format(primary.backup_dir) + primary.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup_path2, + "--no-sync", + "--checkpoint", + "fast", + ], + "base backup 2 ok", + ) + primary.command_ok( + ["pg_verifybackup", backup_path2], "valid base backup with timeline > 1" + ) + backup_path3 = "{}/test_tar_wal".format(primary.backup_dir) + primary.command_ok( + [ + "pg_basebackup", + "--pgdata", + backup_path3, + "--no-sync", + "--format", + "tar", + "--checkpoint", + "fast", + ], + "tar backup with separate pg_wal.tar", + ) + primary.command_ok( + ["pg_verifybackup", backup_path3], + "WAL verification succeeds with separate pg_wal.tar", + ) diff --git a/src/bin/pg_verifybackup/pyt/test_008_untar.py b/src/bin/pg_verifybackup/pyt/test_008_untar.py new file mode 100644 index 0000000000000..a43b82ab9e607 --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_008_untar.py @@ -0,0 +1,103 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_verifybackup/t/008_untar.pl. + +Server-side tar-format base backups (--target server:...) with each supported +compression method produce exactly the expected archive files (base.tar[.ext] +plus one per tablespace) alongside the manifest, and verify successfully. +Methods the build lacks are skipped. +""" + +import shutil +import tempfile + +import pypg + + +def _configs(tsoid): + z = pypg.check_pg_config(r"#define HAVE_LIBZ 1") + lz4 = pypg.check_pg_config(r"#define USE_LZ4 1") + zstd = pypg.check_pg_config(r"#define USE_ZSTD 1") + return [ + ("none", [], ["base.tar", "{}.tar".format(tsoid)], True), + ( + "gzip", + ["--compress", "server-gzip"], + ["base.tar.gz", "{}.tar.gz".format(tsoid)], + z, + ), + ( + "lz4", + ["--compress", "server-lz4"], + ["base.tar.lz4", "{}.tar.lz4".format(tsoid)], + lz4, + ), + ( + "lz4", + ["--compress", "server-lz4:5"], + ["base.tar.lz4", "{}.tar.lz4".format(tsoid)], + lz4, + ), + ( + "zstd", + ["--compress", "server-zstd"], + ["base.tar.zst", "{}.tar.zst".format(tsoid)], + zstd, + ), + ( + "zstd", + ["--compress", "server-zstd:level=1,long"], + ["base.tar.zst", "{}.tar.zst".format(tsoid)], + zstd, + ), + ] + + +def test_008_untar(create_pg): + """Server-side tar backups produce expected archives and verify.""" + primary = create_pg("primary", allows_streaming=True) + junk_data = primary.safe_psql( + "SELECT string_agg(encode(sha256(i::bytea), 'hex'), '') " + "FROM generate_series(1, 10240) s(i);" + ) + with open("{}/junk".format(primary.datadir), "w", encoding="utf-8") as jf: + jf.write(junk_data) + source_ts_path = tempfile.mkdtemp(prefix="ts_") + primary.safe_psql( + "CREATE TABLESPACE regress_ts1 LOCATION '{}';\n" + "CREATE TABLE regress_tbl1(i int) TABLESPACE regress_ts1;\n" + "INSERT INTO regress_tbl1 VALUES(generate_series(1,5));".format(source_ts_path) + ) + tsoid = primary.safe_psql( + "SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1'" + ) + backup_path = "{}/server-backup".format(primary.backup_dir) + for method, flags, archives, enabled in _configs(tsoid): + if not enabled: + continue + primary.command_ok( + [ + "pg_basebackup", + "--no-sync", + "--checkpoint", + "fast", + "--target", + "server:{}".format(backup_path), + "--wal-method", + "fetch", + ] + + flags, + "server side backup, compression {}".format(method), + ) + backup_files = ",".join( + sorted(f for f in pypg.slurp_dir(backup_path) if f not in (".", "..")) + ) + expected = ",".join(sorted(["backup_manifest"] + archives)) + assert ( + backup_files == expected + ), "found expected backup files, compression {}".format(method) + primary.command_ok( + ["pg_verifybackup", "--exit-on-error", backup_path], + "verify backup, compression {}".format(method), + ) + shutil.rmtree(backup_path) diff --git a/src/bin/pg_verifybackup/pyt/test_009_extract.py b/src/bin/pg_verifybackup/pyt/test_009_extract.py new file mode 100644 index 0000000000000..88e936ed00c9b --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_009_extract.py @@ -0,0 +1,73 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_verifybackup/t/009_extract.pl. + +A server-compressed plain-format base backup, taken with each compression +method the build supports (none/gzip/lz4/zstd/parallel-zstd), extracts and +verifies successfully. Compression methods the build lacks are skipped. +""" + +import re +import shutil + +import pypg + +_CONFIG = [ + {"method": "none", "flags": [], "enabled": True}, + { + "method": "gzip", + "flags": ["--compress", "server-gzip:5"], + "enabled": pypg.check_pg_config(r"#define HAVE_LIBZ 1"), + }, + { + "method": "lz4", + "flags": ["--compress", "server-lz4:5"], + "enabled": pypg.check_pg_config(r"#define USE_LZ4 1"), + }, + { + "method": "zstd", + "flags": ["--compress", "server-zstd:5"], + "enabled": pypg.check_pg_config(r"#define USE_ZSTD 1"), + }, + { + "method": "parallel zstd", + "flags": ["--compress", "server-zstd:workers=3"], + "enabled": pypg.check_pg_config(r"#define USE_ZSTD 1"), + "possibly_unsupported": r"could not set compression worker count to 3: Unsupported parameter", + }, +] + + +def test_009_extract(create_pg): + """Server-compressed backups extract and verify for each supported method.""" + primary = create_pg("primary", allows_streaming=True) + for tc in _CONFIG: + backup_path = "{}/extract_backup".format(primary.backup_dir) + method = tc["method"] + if not tc["enabled"]: + continue + result = primary.bin.run_command( + [ + "pg_basebackup", + "--pgdata", + backup_path, + "--wal-method", + "fetch", + "--no-sync", + "--checkpoint", + "fast", + "--format", + "plain", + ] + + tc["flags"] + ) + unsupported = tc.get("possibly_unsupported") + if result.rc != 0 and unsupported and re.search(unsupported, result.stderr): + continue + assert result.rc == 0, "backup done, compression {}".format(method) + primary.command_ok( + ["pg_verifybackup", "--exit-on-error", backup_path], + 'backup verified, compression method "{}"'.format(method), + ) + shutil.rmtree(backup_path) diff --git a/src/bin/pg_verifybackup/pyt/test_010_client_untar.py b/src/bin/pg_verifybackup/pyt/test_010_client_untar.py new file mode 100644 index 0000000000000..f4f7e6694e241 --- /dev/null +++ b/src/bin/pg_verifybackup/pyt/test_010_client_untar.py @@ -0,0 +1,87 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_verifybackup/t/010_client_untar.pl. + +Client-side tar-format base backups (--format tar) with each supported +compression method produce the expected base.tar[.ext] archive plus the +manifest and verify. Methods the build lacks (or a parallel-zstd worker count it +cannot honor) are skipped. +""" + +import re +import shutil + +import pypg + + +def _configs(): + z = pypg.check_pg_config(r"#define HAVE_LIBZ 1") + lz4 = pypg.check_pg_config(r"#define USE_LZ4 1") + zstd = pypg.check_pg_config(r"#define USE_ZSTD 1") + return [ + ("none", [], "base.tar", True, None), + ("gzip", ["--compress", "client-gzip:5"], "base.tar.gz", z, None), + ("lz4", ["--compress", "client-lz4:5"], "base.tar.lz4", lz4, None), + ("lz4", ["--compress", "client-lz4:1"], "base.tar.lz4", lz4, None), + ("zstd", ["--compress", "client-zstd:5"], "base.tar.zst", zstd, None), + ( + "zstd", + ["--compress", "client-zstd:level=1,long"], + "base.tar.zst", + zstd, + None, + ), + ( + "parallel zstd", + ["--compress", "client-zstd:workers=3"], + "base.tar.zst", + zstd, + r"could not set compression worker count to 3: Unsupported parameter", + ), + ] + + +def test_010_client_untar(create_pg): + """Client-side tar backups produce expected archives and verify.""" + primary = create_pg("primary", allows_streaming=True) + junk_data = primary.safe_psql( + "SELECT string_agg(encode(sha256(i::bytea), 'hex'), '') " + "FROM generate_series(1, 10240) s(i);" + ) + with open("{}/junk".format(primary.datadir), "w", encoding="utf-8") as jf: + jf.write(junk_data) + backup_path = "{}/client-backup".format(primary.backup_dir) + for method, flags, archive, enabled, unsupported in _configs(): + if not enabled: + continue + result = primary.bin.run_command( + [ + "pg_basebackup", + "--no-sync", + "--pgdata", + backup_path, + "--wal-method", + "fetch", + "--checkpoint", + "fast", + "--format", + "tar", + ] + + flags + ) + if result.rc != 0 and unsupported and re.search(unsupported, result.stderr): + continue + assert result.rc == 0, "client side backup, compression {}".format(method) + backup_files = ",".join( + sorted(f for f in pypg.slurp_dir(backup_path) if f not in (".", "..")) + ) + expected = ",".join(sorted(["backup_manifest", archive])) + assert ( + backup_files == expected + ), "found expected backup files, compression {}".format(method) + primary.command_ok( + ["pg_verifybackup", "--exit-on-error", backup_path], + "verify backup, compression {}".format(method), + ) + shutil.rmtree(backup_path) From 883586ca0b62844959e20d383cc1f34c8ec2f2fe Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:32 -0400 Subject: [PATCH 12/36] pytest: port pg_dump and pg_upgrade TAP suites Port the pg_dump 002 matrix and pg_upgrade cohort (002_pg_upgrade, 005_char_signedness, 008_extension_control_path and friends) to pytest. Co-authored-by: Greg Burd --- src/bin/pg_dump/meson.build | 18 + src/bin/pg_dump/pyt/test_001_basic.py | 303 + src/bin/pg_dump/pyt/test_002_pg_dump.py | 6282 +++++++++++++++++ .../pyt/test_003_pg_dump_with_server.py | 39 + .../pg_dump/pyt/test_004_pg_dump_parallel.py | 88 + .../pyt/test_005_pg_dump_filterfile.py | 543 ++ .../pg_dump/pyt/test_006_pg_dump_compress.py | 374 + src/bin/pg_dump/pyt/test_007_pg_dumpall.py | 611 ++ src/bin/pg_dump/pyt/test_010_dump_connstr.py | 336 + src/bin/pg_upgrade/meson.build | 16 + src/bin/pg_upgrade/pyt/test_001_basic.py | 15 + src/bin/pg_upgrade/pyt/test_002_pg_upgrade.py | 521 ++ .../pg_upgrade/pyt/test_003_logical_slots.py | 248 + .../pg_upgrade/pyt/test_004_subscription.py | 446 ++ .../pyt/test_005_char_signedness.py | 92 + .../pg_upgrade/pyt/test_006_transfer_modes.py | 209 + .../pyt/test_007_multixact_conversion.py | 291 + .../pyt/test_008_extension_control_path.py | 151 + 18 files changed, 10583 insertions(+) create mode 100644 src/bin/pg_dump/pyt/test_001_basic.py create mode 100644 src/bin/pg_dump/pyt/test_002_pg_dump.py create mode 100644 src/bin/pg_dump/pyt/test_003_pg_dump_with_server.py create mode 100644 src/bin/pg_dump/pyt/test_004_pg_dump_parallel.py create mode 100644 src/bin/pg_dump/pyt/test_005_pg_dump_filterfile.py create mode 100644 src/bin/pg_dump/pyt/test_006_pg_dump_compress.py create mode 100644 src/bin/pg_dump/pyt/test_007_pg_dumpall.py create mode 100644 src/bin/pg_dump/pyt/test_010_dump_connstr.py create mode 100644 src/bin/pg_upgrade/pyt/test_001_basic.py create mode 100644 src/bin/pg_upgrade/pyt/test_002_pg_upgrade.py create mode 100644 src/bin/pg_upgrade/pyt/test_003_logical_slots.py create mode 100644 src/bin/pg_upgrade/pyt/test_004_subscription.py create mode 100644 src/bin/pg_upgrade/pyt/test_005_char_signedness.py create mode 100644 src/bin/pg_upgrade/pyt/test_006_transfer_modes.py create mode 100644 src/bin/pg_upgrade/pyt/test_007_multixact_conversion.py create mode 100644 src/bin/pg_upgrade/pyt/test_008_extension_control_path.py diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build index 7c9a475963b5c..c9c8725b21331 100644 --- a/src/bin/pg_dump/meson.build +++ b/src/bin/pg_dump/meson.build @@ -89,6 +89,24 @@ tests += { 'name': 'pg_dump', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'env': { + 'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', + 'LZ4': program_lz4.found() ? program_lz4.full_path() : '', + 'ZSTD': program_zstd.found() ? program_zstd.full_path() : '', + 'with_icu': icu.found() ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_pg_dump.py', + 'pyt/test_003_pg_dump_with_server.py', + 'pyt/test_004_pg_dump_parallel.py', + 'pyt/test_010_dump_connstr.py', + 'pyt/test_007_pg_dumpall.py', + 'pyt/test_006_pg_dump_compress.py', + 'pyt/test_005_pg_dump_filterfile.py', + ], + }, 'tap': { 'env': { 'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', diff --git a/src/bin/pg_dump/pyt/test_001_basic.py b/src/bin/pg_dump/pyt/test_001_basic.py new file mode 100644 index 0000000000000..6e2c221b8c17c --- /dev/null +++ b/src/bin/pg_dump/pyt/test_001_basic.py @@ -0,0 +1,303 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_dump/t/001_basic.pl. + +pg_dump/pg_restore command-line option validation: invalid options, mutually exclusive options, compression specs, and required-argument errors. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_basic(pg_bin): + """pg_dump/pg_restore command-line option validation.""" + pg_bin.program_help_ok("pg_dump") + pg_bin.program_version_ok("pg_dump") + pg_bin.program_options_handling_ok("pg_dump") + pg_bin.program_help_ok("pg_restore") + pg_bin.program_version_ok("pg_restore") + pg_bin.program_options_handling_ok("pg_restore") + pg_bin.program_help_ok("pg_dumpall") + pg_bin.program_version_ok("pg_dumpall") + pg_bin.program_options_handling_ok("pg_dumpall") + pg_bin.command_fails_like( + ["pg_dump", "qqq", "abc"], + r"""pg_dump:\ error:\ too\ many\ command\-line\ arguments\ \(first\ is\ "abc"\)""", + "pg_dump: too many command-line arguments", + ) + pg_bin.command_fails_like( + ["pg_restore", "qqq", "abc"], + r"""pg_restore:\ error:\ too\ many\ command\-line\ arguments\ \(first\ is\ "abc"\)""", + "pg_restore: too many command-line arguments", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "qqq", "abc"], + r"""pg_dumpall:\ error:\ too\ many\ command\-line\ arguments\ \(first\ is\ "qqq"\)""", + "pg_dumpall: too many command-line arguments", + ) + pg_bin.command_fails_like( + ["pg_dump", "-s", "-a"], + r"""pg_dump:\ error:\ options\ \-a/\-\-data\-only\ and\ \-s/\-\-schema\-only\ cannot\ be\ used\ together""", + "pg_dump: options -a/--data-only and -s/--schema-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dump", "-s", "--statistics-only"], + r"""pg_dump:\ error:\ options\ \-s/\-\-schema\-only\ and\ \-\-statistics\-only\ cannot\ be\ used\ together""", + "pg_dump: error: options -s/--schema-only and --statistics-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dump", "-a", "--statistics-only"], + r"""pg_dump:\ error:\ options\ \-a/\-\-data\-only\ and\ \-\-statistics\-only\ cannot\ be\ used\ together""", + "pg_dump: error: options -a/--data-only and --statistics-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dump", "-s", "--include-foreign-data=xxx"], + r"""pg_dump:\ error:\ options\ \-\-include\-foreign\-data\ and\ \-s/\-\-schema\-only\ cannot\ be\ used\ together""", + "pg_dump: options --include-foreign-data and -s/--schema-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dump", "--statistics-only", "--no-statistics"], + r"""pg_dump:\ error:\ options\ \-\-statistics\-only\ and\ \-\-no\-statistics\ cannot\ be\ used\ together""", + "pg_dump: options --statistics-only and --no-statistics cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dump", "-j2", "--include-foreign-data=xxx"], + r"""pg_dump:\ error:\ option\ \-\-include\-foreign\-data\ is\ not\ supported\ with\ parallel\ backup""", + "pg_dump: option --include-foreign-data is not supported with parallel backup", + ) + pg_bin.command_fails_like( + ["pg_restore"], + r"""pg_restore:\ error:\ one\ of\ \-d/\-\-dbname\ and\ \-f/\-\-file\ must\ be\ specified""", + "pg_restore: error: one of -d/--dbname and -f/--file must be specified", + ) + pg_bin.command_fails_like( + ["pg_restore", "-s", "-a", "-f -"], + r"""pg_restore:\ error:\ options\ \-a/\-\-data\-only\ and\ \-s/\-\-schema\-only\ cannot\ be\ used\ together""", + "pg_restore: options -a/--data-only and -s/--schema-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_restore", "-d", "xxx", "-f", "xxx"], + r"""pg_restore:\ error:\ options\ \-d/\-\-dbname\ and\ \-f/\-\-file\ cannot\ be\ used\ together""", + "pg_restore: options -d/--dbname and -f/--file cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dump", "-c", "-a"], + r"""pg_dump:\ error:\ options\ \-c/\-\-clean\ and\ \-a/\-\-data\-only\ cannot\ be\ used\ together""", + "pg_dump: options -c/--clean and -a/--data-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "-c", "-a"], + r"""pg_dumpall:\ error:\ options\ \-c/\-\-clean\ and\ \-a/\-\-data\-only\ cannot\ be\ used\ together""", + "pg_dumpall: options -c/--clean and -a/--data-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_restore", "-c", "-a", "-f -"], + r"""pg_restore:\ error:\ options\ \-c/\-\-clean\ and\ \-a/\-\-data\-only\ cannot\ be\ used\ together""", + "pg_restore: options -c/--clean and -a/--data-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dump", "--if-exists"], + r"""pg_dump:\ error:\ option\ \-\-if\-exists\ requires\ option\ \-c/\-\-clean""", + "pg_dump: option --if-exists requires option -c/--clean", + ) + pg_bin.command_fails_like( + ["pg_dump", "-j3"], + r"""pg_dump:\ error:\ parallel\ backup\ only\ supported\ by\ the\ directory\ format""", + "pg_dump: parallel backup only supported by the directory format", + ) + pg_bin.command_fails_like( + ["pg_dump", "-j", "-1 "], + r"""pg_dump:\ error:\ \-j/\-\-jobs\ must\ be\ in\ range""", + "pg_dump: -j/--jobs must be in range", + ) + pg_bin.command_fails_like( + ["pg_dump", "-F", "garbage"], + r"""pg_dump:\ error:\ invalid\ output\ format""", + "pg_dump: invalid output format", + ) + pg_bin.command_fails_like( + ["pg_restore", "-j", "-1", "-f -"], + r"""pg_restore:\ error:\ \-j/\-\-jobs\ must\ be\ in\ range""", + "pg_restore: -j/--jobs must be in range", + ) + pg_bin.command_fails_like( + ["pg_restore", "--single-transaction", "-j3", "-f -"], + r"""pg_restore:\ error:\ cannot\ specify\ both\ \-\-single\-transaction\ and\ multiple\ jobs""", + "pg_restore: cannot specify both --single-transaction and multiple jobs", + ) + pg_bin.command_fails_like( + ["pg_dump", "--compress", "garbage"], + r"""pg_dump:\ error:\ unrecognized\ compression\ algorithm""", + "pg_dump: invalid --compress", + ) + pg_bin.command_fails_like( + ["pg_dump", "--compress", "none:1"], + r"""pg_dump:\ error:\ invalid\ compression\ specification:\ compression\ algorithm\ "none"\ does\ not\ accept\ a\ compression\ level""", + 'pg_dump: invalid compression specification: compression algorithm "none" does not accept a compression level', + ) + if pg_bin.check_pg_config("#define HAVE_LIBZ 1"): + pg_bin.command_fails_like( + ["pg_dump", "-Z", "15"], + r"""pg_dump:\ error:\ invalid\ compression\ specification:\ compression\ algorithm\ "gzip"\ expects\ a\ compression\ level\ between\ 1\ and\ 9\ \(default\ at\ \-1\)""", + "pg_dump: invalid compression specification: must be in range", + ) + pg_bin.command_fails_like( + ["pg_dump", "--compress", "1", "--format", "tar"], + r"""pg_dump:\ error:\ compression\ is\ not\ supported\ by\ tar\ archive\ format""", + "pg_dump: compression is not supported by tar archive format", + ) + pg_bin.command_fails_like( + ["pg_dump", "-Z", "gzip:nonInt"], + r'''pg_dump:\ error:\ invalid\ compression\ specification:\ unrecognized\ compression\ option:\ "nonInt"''', + "pg_dump: invalid compression specification: must be an integer", + ) + else: + pg_bin.command_fails_like( + ["pg_dump", "--format", "tar", "-j3"], + r"""pg_dump:\ error:\ parallel\ backup\ only\ supported\ by\ the\ directory\ format""", + "pg_dump: warning: parallel backup not supported by tar format", + ) + pg_bin.command_fails_like( + ["pg_dump", "-Z", "gzip:nonInt", "--format", "tar", "-j2"], + r"""pg_dump:\ error:\ invalid\ compression\ specification:\ unrecognized\ compression\ option""", + "pg_dump: invalid compression specification: must be an integer", + ) + pg_bin.command_fails_like( + ["pg_dump", "--extra-float-digits", "-16"], + r"""pg_dump:\ error:\ \-\-extra\-float\-digits\ must\ be\ in\ range""", + "pg_dump: --extra-float-digits must be in range", + ) + pg_bin.command_fails_like( + ["pg_dump", "--rows-per-insert", "0"], + r"""pg_dump:\ error:\ \-\-rows\-per\-insert\ must\ be\ in\ range""", + "pg_dump: --rows-per-insert must be in range", + ) + pg_bin.command_fails_like( + ["pg_restore", "--if-exists", "-f -"], + r"""pg_restore:\ error:\ option\ \-\-if\-exists\ requires\ option\ \-c/\-\-clean""", + "pg_restore: option --if-exists requires option -c/--clean", + ) + pg_bin.command_fails_like( + ["pg_restore", "-f -", "-F", "garbage"], + r"""pg_restore:\ error:\ unrecognized\ archive\ format\ "garbage";""", + "pg_restore: unrecognized archive format", + ) + pg_bin.command_fails_like( + ["pg_restore", "-f -", "-F", ""], + r"""pg_restore:\ error:\ unrecognized\ archive\ format\ "";""", + "pg_restore: empty archive format", + ) + pg_bin.command_fails_like( + ["pg_dump", "--on-conflict-do-nothing"], + r"""pg_dump: error: option --on-conflict-do-nothing requires option --inserts, --rows-per-insert, or --column-inserts""", + "pg_dump: --on-conflict-do-nothing requires --inserts, --rows-per-insert, --column-inserts", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "-g", "-r"], + r"""pg_dumpall:\ error:\ options\ \-g/\-\-globals\-only\ and\ \-r/\-\-roles\-only\ cannot\ be\ used\ together""", + "pg_dumpall: options -g/--globals-only and -r/--roles-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "-g", "-t"], + r"""pg_dumpall:\ error:\ options\ \-g/\-\-globals\-only\ and\ \-t/\-\-tablespaces\-only\ cannot\ be\ used\ together""", + "pg_dumpall: options -g/--globals-only and -t/--tablespaces-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "-r", "-t"], + r"""pg_dumpall:\ error:\ options\ \-r/\-\-roles\-only\ and\ \-t/\-\-tablespaces\-only\ cannot\ be\ used\ together""", + "pg_dumpall: options -r/--roles-only and -t/--tablespaces-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--if-exists"], + r"""pg_dumpall:\ error:\ option\ \-\-if\-exists\ requires\ option\ \-c/\-\-clean""", + "pg_dumpall: option --if-exists requires option -c/--clean", + ) + pg_bin.command_fails_like( + ["pg_restore", "-C", "-1", "-f -"], + r"""pg_restore:\ error:\ options\ \-C/\-\-create\ and\ \-1/\-\-single\-transaction\ cannot\ be\ used\ together""", + "pg_restore: options -C\\/--create and -1\\/--single-transaction cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--exclude-database=foo", "--globals-only"], + r"""pg_dumpall:\ error:\ options\ \-\-exclude\-database\ and\ \-g/\-\-globals\-only\ cannot\ be\ used\ together""", + "pg_dumpall: options --exclude-database and -g/--globals-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "-a", "--no-data"], + r"""pg_dumpall:\ error:\ options\ \-a/\-\-data\-only\ and\ \-\-no\-data\ cannot\ be\ used\ together""", + "pg_dumpall: options -a\\/--data-only and --no-data cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "-s", "--no-schema"], + r"""pg_dumpall:\ error:\ options\ \-s/\-\-schema\-only\ and\ \-\-no\-schema\ cannot\ be\ used\ together""", + "pg_dumpall: options -s\\/--schema-only and --no-schema cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--statistics-only", "--no-statistics"], + r"""pg_dumpall:\ error:\ options\ \-\-statistics\-only\ and\ \-\-no\-statistics\ cannot\ be\ used\ together""", + "pg_dumpall: options --statistics-only and --no-statistics cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--statistics", "--no-statistics"], + r"""pg_dumpall:\ error:\ options\ \-\-statistics\ and\ \-\-no\-statistics\ cannot\ be\ used\ together""", + "pg_dumpall: options --statistics-only and --no-statistics cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--statistics", "--tablespaces-only"], + r"""pg_dumpall:\ error:\ options\ \-\-statistics\ and\ \-t/\-\-tablespaces\-only\ cannot\ be\ used\ together""", + "pg_dumpall: options --statistics and -t\\/--tablespaces-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--format", "x"], + r"""pg_dumpall:\ error:\ unrecognized\ output\ format\ "x";""", + "pg_dumpall: unrecognized output format", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--format", "d", "--restrict-key=uu", "-f dumpfile"], + r"""pg_dumpall:\ error:\ option\ \-\-restrict\-key\ can\ only\ be\ used\ with\ \-\-format=plain""", + "pg_dumpall: --restrict-key can only be used with plain dump format", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--format", "d", "--globals-only", "--clean", "-f", "dumpfile"], + r"""pg_dumpall:\ error:\ options\ \-\-clean\ and\ \-g/\-\-globals\-only\ cannot\ be\ used\ together\ in\ non\-text\ dump""", + "pg_dumpall: --clean and -g/--globals-only cannot be used together in non-text dump", + ) + pg_bin.command_fails_like( + ["pg_dumpall", "--format", "d"], + r"""pg_dumpall:\ error:\ option\ \-F/\-\-format=d\|c\|t\ requires\ option\ \-f/\-\-file""", + "pg_dumpall: non-plain format requires --file option", + ) + pg_bin.command_fails_like( + ["pg_restore", "--exclude-database=foo", "--globals-only", "-d", "xxx"], + r"""pg_restore:\ error:\ options\ \-\-exclude\-database\ and\ \-g/\-\-globals\-only\ cannot\ be\ used\ together""", + "pg_restore: options --exclude-database and -g/--globals-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_restore", "--data-only", "--globals-only", "-d", "xxx"], + r"""pg_restore:\ error:\ options\ \-a/\-\-data\-only\ and\ \-g/\-\-globals\-only\ cannot\ be\ used\ together""", + "pg_restore: error: options -a/--data-only and -g/--globals-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_restore", "--schema-only", "--globals-only", "-d", "xxx"], + r"""pg_restore:\ error:\ options\ \-g/\-\-globals\-only\ and\ \-s/\-\-schema\-only\ cannot\ be\ used\ together""", + "pg_restore: error: options -g/--globals-only and -s/--schema-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_restore", "--statistics-only", "--globals-only", "-d", "xxx"], + r"""pg_restore:\ error:\ options\ \-g/\-\-globals\-only\ and\ \-\-statistics\-only\ cannot\ be\ used\ together""", + "pg_restore: error: options -g/--globals-only and --statistics-only cannot be used together", + ) + pg_bin.command_fails_like( + ["pg_restore", "--exclude-database=foo", "-d", "xxx", "dumpdir"], + r"""pg_restore:\ error:\ option\ \-\-exclude\-database\ can\ be\ used\ only\ when\ restoring\ an\ archive\ created\ by\ pg_dumpall""", + "When option --exclude-database is used in pg_restore with dump of pg_dump", + ) + pg_bin.command_fails_like( + ["pg_restore", "--globals-only", "-d", "xxx", "dumpdir"], + r"""pg_restore:\ error:\ option\ \-g/\-\-globals\-only\ can\ be\ used\ only\ when\ restoring\ an\ archive\ created\ by\ pg_dumpall""", + "When option --globals-only is used in pg_restore with the dump of pg_dump", + ) + pg_bin.command_fails_like( + ["pg_restore", "--globals-only", "--no-globals", "-d", "xxx", "dumpdir"], + r"""pg_restore:\ error:\ options\ \-g/\-\-globals\-only\ and\ \-\-no\-globals\ cannot\ be\ used\ together""", + "options --no-globals and --globals-only cannot be used together", + ) diff --git a/src/bin/pg_dump/pyt/test_002_pg_dump.py b/src/bin/pg_dump/pyt/test_002_pg_dump.py new file mode 100644 index 0000000000000..a73b901fb9cbf --- /dev/null +++ b/src/bin/pg_dump/pyt/test_002_pg_dump.py @@ -0,0 +1,6282 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-lines +"""Port of src/bin/pg_dump/t/002_pg_dump.pl. + +Data-driven pg_dump/pg_restore matrix for the pg_dump binary. A large set of +named dump runs (full dumps, section/schema/table/data-only dumps, format + +restore round-trips, exclude/only variants, pg_dumpall globals/dbprivs, +statistics import, ...) is executed against a single seeded server. Each named +test owns a regexp plus 'like'/'unlike' membership keyed by run (or test_key); +for every run the test's regexp must match the dump output iff the run is a +'like' and not an 'unlike'. + +Faithful transcription of the Perl original: %pgdump_runs, %dump_test_schema_runs, +%full_runs, %tests and the driver are reproduced below. Regexps preserve the +Perl /xm (and /xms, /m, /s) semantics via re.VERBOSE | re.MULTILINE (and +re.DOTALL where /s applies). The Perl \\Q...\\E quotemeta blocks are expanded +with re.escape (which also escapes spaces, so they survive VERBOSE mode). +""" + +import glob +import os +import re +import tempfile +from typing import Dict, List, Optional, Pattern, Tuple + +import pypg + +XM = re.VERBOSE | re.MULTILINE +XMS = re.VERBOSE | re.MULTILINE | re.DOTALL + +# Each regexp is built from a sequence of segments. A ("lit", text) segment is +# a Perl \Q...\E quotemeta literal (re.escape, which also escapes spaces so it +# survives VERBOSE mode); an ("rx", raw) segment is verbatim regex syntax. +_Segment = Tuple[str, str] + + +def _qr(parts: List[_Segment], flags: int) -> Pattern[str]: + """Compile a Perl-style qr/.../ from literal/regex segments.""" + pieces = [] + for kind, val in parts: + pieces.append(re.escape(val) if kind == "lit" else val) + return re.compile("".join(pieces), flags) + + +# --------------------------------------------------------------------------- +# Definition of the pg_dump runs to make. Mirrors %pgdump_runs. +# +# Each entry maps a run name to a dict with: 'dump_cmd' (argv, with $tempdir +# placeholders resolved at runtime), optional 'restore_cmd', optional +# 'test_key' (reuse another run's like/unlike set), optional 'database' (the +# database the run dumps from, default 'postgres'), optional 'command_like' +# (run a side command and assert its stdout matches), optional 'glob_patterns' +# (files that must exist after the dump). +# --------------------------------------------------------------------------- + + +def _pgdump_runs(tempdir: str, supports_gzip: bool) -> Dict[str, dict]: + """Build the run matrix with $tempdir paths resolved (mirrors %pgdump_runs).""" + return { + "binary_upgrade": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "custom", + "--file", + f"{tempdir}/binary_upgrade.dump", + "--no-password", + "--no-data", + "--sequence-data", + "--binary-upgrade", + "--statistics", + "--dbname", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--format", + "custom", + "--verbose", + "--file", + f"{tempdir}/binary_upgrade.sql", + "--statistics", + f"{tempdir}/binary_upgrade.dump", + ], + }, + "clean": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/clean.sql", + "--clean", + "--statistics", + "--dbname", + "postgres", + ], + }, + "clean_if_exists": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/clean_if_exists.sql", + "--clean", + "--if-exists", + "--encoding", + "UTF8", + "--statistics", + "postgres", + ], + }, + "column_inserts": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/column_inserts.sql", + "--data-only", + "--column-inserts", + "postgres", + ], + }, + "createdb": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/createdb.sql", + "--create", + "--no-reconnect", + "--verbose", + "--statistics", + "postgres", + ], + }, + "data_only": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/data_only.sql", + "--data-only", + "--superuser", + "test_superuser", + "--disable-triggers", + "--verbose", + "postgres", + ], + }, + "defaults": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/defaults.sql", + "--statistics", + "postgres", + ], + }, + "defaults_no_public": { + "database": "regress_pg_dump_test", + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/defaults_no_public.sql", + "--statistics", + "regress_pg_dump_test", + ], + }, + "defaults_no_public_clean": { + "database": "regress_pg_dump_test", + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--clean", + "--file", + f"{tempdir}/defaults_no_public_clean.sql", + "--statistics", + "regress_pg_dump_test", + ], + }, + "defaults_public_owner": { + "database": "regress_public_owner", + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/defaults_public_owner.sql", + "--statistics", + "regress_public_owner", + ], + }, + # Do not use --no-sync to give test coverage for data sync. + # By default, the custom format compresses its data file + # when the code is compiled with gzip support, and lets them + # uncompressed when not compiled with it. + "defaults_custom_format": { + "test_key": "defaults", + "dump_cmd": [ + "pg_dump", + "--format", + "custom", + "--file", + f"{tempdir}/defaults_custom_format.dump", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--format", + "custom", + "--file", + f"{tempdir}/defaults_custom_format.sql", + "--statistics", + f"{tempdir}/defaults_custom_format.dump", + ], + "command_like": { + "command": [ + "pg_restore", + "--list", + f"{tempdir}/defaults_custom_format.dump", + ], + "expected": ( + re.compile(r"Compression:\ gzip", re.VERBOSE) + if supports_gzip + else re.compile(r"Compression:\ none", re.VERBOSE) + ), + "name": "data content is gzip-compressed by default if available", + }, + }, + # Do not use --no-sync to give test coverage for data sync. + # By default, the directory format compresses its data files + # when the code is compiled with gzip support, and lets them + # uncompressed when not compiled with it. + "defaults_dir_format": { + "test_key": "defaults", + "dump_cmd": [ + "pg_dump", + "--format", + "directory", + "--file", + f"{tempdir}/defaults_dir_format", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--format", + "directory", + "--file", + f"{tempdir}/defaults_dir_format.sql", + "--statistics", + f"{tempdir}/defaults_dir_format", + ], + "command_like": { + "command": [ + "pg_restore", + "--list", + f"{tempdir}/defaults_dir_format", + ], + "expected": ( + re.compile(r"Compression:\ gzip", re.VERBOSE) + if supports_gzip + else re.compile(r"Compression:\ none", re.VERBOSE) + ), + "name": "data content is gzip-compressed by default", + }, + "glob_patterns": [ + f"{tempdir}/defaults_dir_format/toc.dat", + f"{tempdir}/defaults_dir_format/blobs_*.toc", + ( + f"{tempdir}/defaults_dir_format/*.dat.gz" + if supports_gzip + else f"{tempdir}/defaults_dir_format/*.dat" + ), + ], + }, + # Do not use --no-sync to give test coverage for data sync. + "defaults_parallel": { + "test_key": "defaults", + "dump_cmd": [ + "pg_dump", + "--format", + "directory", + "--jobs", + "2", + "--file", + f"{tempdir}/defaults_parallel", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--file", + f"{tempdir}/defaults_parallel.sql", + "--statistics", + f"{tempdir}/defaults_parallel", + ], + }, + # Do not use --no-sync to give test coverage for data sync. + "defaults_tar_format": { + "test_key": "defaults", + "dump_cmd": [ + "pg_dump", + "--format", + "tar", + "--file", + f"{tempdir}/defaults_tar_format.tar", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--format", + "tar", + "--file", + f"{tempdir}/defaults_tar_format.sql", + "--statistics", + f"{tempdir}/defaults_tar_format.tar", + ], + }, + "exclude_dump_test_schema": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/exclude_dump_test_schema.sql", + "--exclude-schema", + "dump_test", + "--statistics", + "postgres", + ], + }, + "exclude_test_table": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/exclude_test_table.sql", + "--exclude-table", + "dump_test.test_table", + "--statistics", + "postgres", + ], + }, + "exclude_measurement": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/exclude_measurement.sql", + "--exclude-table-and-children", + "dump_test.measurement", + "--statistics", + "postgres", + ], + }, + "exclude_measurement_data": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/exclude_measurement_data.sql", + "--exclude-table-data-and-children", + "dump_test.measurement", + "--no-unlogged-table-data", + "--statistics", + "postgres", + ], + }, + "exclude_test_table_data": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/exclude_test_table_data.sql", + "--exclude-table-data", + "dump_test.test_table", + "--no-unlogged-table-data", + "--statistics", + "postgres", + ], + }, + "inserts": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/inserts.sql", + "--data-only", + "--inserts", + "postgres", + ], + }, + "pg_dumpall_globals": { + "dump_cmd": [ + "pg_dumpall", + "--verbose", + "--file", + f"{tempdir}/pg_dumpall_globals.sql", + "--globals-only", + "--no-sync", + ], + }, + "pg_dumpall_globals_clean": { + "dump_cmd": [ + "pg_dumpall", + "--file", + f"{tempdir}/pg_dumpall_globals_clean.sql", + "--globals-only", + "--clean", + "--no-sync", + ], + }, + "pg_dumpall_dbprivs": { + "dump_cmd": [ + "pg_dumpall", + "--no-sync", + "--file", + f"{tempdir}/pg_dumpall_dbprivs.sql", + "--statistics", + ], + }, + "pg_dumpall_exclude": { + "dump_cmd": [ + "pg_dumpall", + "--verbose", + "--file", + f"{tempdir}/pg_dumpall_exclude.sql", + "--exclude-database", + "*dump_test*", + "--no-sync", + "--statistics", + ], + }, + "no_toast_compression": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_toast_compression.sql", + "--no-toast-compression", + "--statistics", + "postgres", + ], + }, + "no_large_objects": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_large_objects.sql", + "--no-large-objects", + "--statistics", + "postgres", + ], + }, + "no_policies": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_policies.sql", + "--no-policies", + "--statistics", + "postgres", + ], + }, + "no_policies_restore": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "custom", + "--file", + f"{tempdir}/no_policies_restore.dump", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--format", + "custom", + "--file", + f"{tempdir}/no_policies_restore.sql", + "--no-policies", + "--statistics", + f"{tempdir}/no_policies_restore.dump", + ], + }, + "no_privs": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_privs.sql", + "--no-privileges", + "--statistics", + "postgres", + ], + }, + "no_owner": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_owner.sql", + "--no-owner", + "--statistics", + "postgres", + ], + }, + "no_subscriptions": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_subscriptions.sql", + "--no-subscriptions", + "--statistics", + "postgres", + ], + }, + "no_subscriptions_restore": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "custom", + "--file", + f"{tempdir}/no_subscriptions_restore.dump", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--format", + "custom", + "--file", + f"{tempdir}/no_subscriptions_restore.sql", + "--no-subscriptions", + "--statistics", + f"{tempdir}/no_subscriptions_restore.dump", + ], + }, + "no_table_access_method": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_table_access_method.sql", + "--no-table-access-method", + "--statistics", + "postgres", + ], + }, + "only_dump_test_schema": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/only_dump_test_schema.sql", + "--schema", + "dump_test", + "--statistics", + "postgres", + ], + }, + "only_dump_test_table": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/only_dump_test_table.sql", + "--table", + "dump_test.test_table", + "--lock-wait-timeout", + str(1000 * pypg.test_timeout_default()), + "--statistics", + "postgres", + ], + }, + "only_dump_measurement": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/only_dump_measurement.sql", + "--table-and-children", + "dump_test.measurement", + "--lock-wait-timeout", + str(1000 * pypg.test_timeout_default()), + "--statistics", + "postgres", + ], + }, + "role": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/role.sql", + "--role", + "regress_dump_test_role", + "--schema", + "dump_test_second_schema", + "--statistics", + "postgres", + ], + }, + "role_parallel": { + "test_key": "role", + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "directory", + "--jobs", + "2", + "--file", + f"{tempdir}/role_parallel", + "--role", + "regress_dump_test_role", + "--schema", + "dump_test_second_schema", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--file", + f"{tempdir}/role_parallel.sql", + "--statistics", + f"{tempdir}/role_parallel", + ], + }, + "rows_per_insert": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/rows_per_insert.sql", + "--data-only", + "--rows-per-insert", + "4", + "--table", + "dump_test.test_table", + "--table", + "dump_test.test_fourth_table", + "postgres", + ], + }, + "schema_only": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "plain", + "--file", + f"{tempdir}/schema_only.sql", + "--schema-only", + "postgres", + ], + }, + "section_pre_data": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/section_pre_data.sql", + "--section", + "pre-data", + "--statistics", + "postgres", + ], + }, + "section_data": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/section_data.sql", + "--section", + "data", + "--statistics", + "postgres", + ], + }, + "section_post_data": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/section_post_data.sql", + "--section", + "post-data", + "--statistics", + "postgres", + ], + }, + "test_schema_plus_large_objects": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/test_schema_plus_large_objects.sql", + "--schema", + "dump_test", + "--large-objects", + "--no-large-objects", + "--statistics", + "postgres", + ], + }, + "no_statistics": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + f"--file={tempdir}/no_statistics.sql", + "--no-statistics", + "postgres", + ], + }, + "no_data_no_schema": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + f"--file={tempdir}/no_data_no_schema.sql", + "--no-data", + "--no-schema", + "postgres", + "--statistics", + ], + }, + "statistics_only": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + f"--file={tempdir}/statistics_only.sql", + "--statistics-only", + "postgres", + ], + }, + "no_schema": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + f"--file={tempdir}/no_schema.sql", + "--no-schema", + "--statistics", + "postgres", + ], + }, + } + + +# Tests which target the 'dump_test' schema, specifically. +# Mirrors %dump_test_schema_runs. +DUMP_TEST_SCHEMA_RUNS: Dict[str, int] = { + "only_dump_test_schema": 1, + "only_dump_measurement": 1, + "test_schema_plus_large_objects": 1, +} + +# Tests which are considered 'full' dumps by pg_dump, but there are flags used +# to exclude specific items (ACLs, LOs, etc). Mirrors %full_runs. +# +# Note: 'schema_only_with_statistics' is not an actual run; it appears here (and +# in many 'unlike' sets) only as a membership marker so the like/unlike +# bookkeeping matches the Perl original exactly. +FULL_RUNS: Dict[str, int] = { + "binary_upgrade": 1, + "clean": 1, + "clean_if_exists": 1, + "createdb": 1, + "defaults": 1, + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "exclude_test_table_data": 1, + "exclude_measurement": 1, + "exclude_measurement_data": 1, + "no_toast_compression": 1, + "no_large_objects": 1, + "no_owner": 1, + "no_policies": 1, + "no_policies_restore": 1, + "no_privs": 1, + "no_statistics": 1, + "no_subscriptions": 1, + "no_subscriptions_restore": 1, + "no_table_access_method": 1, + "pg_dumpall_dbprivs": 1, + "pg_dumpall_exclude": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, +} + + +def _full() -> Dict[str, int]: + """A fresh copy of FULL_RUNS for merging into a test's 'like'.""" + return dict(FULL_RUNS) + + +def _dts() -> Dict[str, int]: + """A fresh copy of DUMP_TEST_SCHEMA_RUNS for merging into a test's 'like'.""" + return dict(DUMP_TEST_SCHEMA_RUNS) + + +# --------------------------------------------------------------------------- +# Definition of the tests to run. Mirrors %tests. +# +# Each entry maps a test name (also the log message) to a dict with: 'regexp' +# (compiled), 'like'/'unlike' dicts keyed by run-name or test_key, optional +# 'all_runs', optional 'create_order' (int) + 'create_sql' (run during setup, +# ordered by it), optional 'database', and optional 'collation'/'icu' gating. +# --------------------------------------------------------------------------- + + +def _tests() -> Dict[str, dict]: # pylint: disable=too-many-statements + """Build the test matrix (mirrors %tests).""" + tests: Dict[str, dict] = {} + + tests["restrict"] = { + "all_runs": 1, + "regexp": re.compile(r"^\\restrict [a-zA-Z0-9]+$", re.MULTILINE), + } + tests["unrestrict"] = { + "all_runs": 1, + "regexp": re.compile(r"^\\unrestrict [a-zA-Z0-9]+$", re.MULTILINE), + } + tests["ALTER DEFAULT PRIVILEGES FOR ROLE regress_dump_test_role GRANT"] = { + "create_order": 14, + "create_sql": "ALTER DEFAULT PRIVILEGES\n" + "\t\t\t\t\t FOR ROLE regress_dump_test_role IN SCHEMA dump_test\n" + "\t\t\t\t\t GRANT SELECT ON TABLES TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER DEFAULT PRIVILEGES "), + ("lit", "FOR ROLE regress_dump_test_role IN SCHEMA dump_test "), + ("lit", "GRANT SELECT ON TABLES TO regress_dump_test_role;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests[ + "ALTER DEFAULT PRIVILEGES FOR ROLE regress_dump_test_role GRANT EXECUTE ON FUNCTIONS" + ] = { + "create_order": 15, + "create_sql": "ALTER DEFAULT PRIVILEGES\n" + "\t\t\t\t\t FOR ROLE regress_dump_test_role IN SCHEMA dump_test\n" + "\t\t\t\t\t GRANT EXECUTE ON FUNCTIONS TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER DEFAULT PRIVILEGES "), + ("lit", "FOR ROLE regress_dump_test_role IN SCHEMA dump_test "), + ("lit", "GRANT ALL ON FUNCTIONS TO regress_dump_test_role;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER DEFAULT PRIVILEGES FOR ROLE regress_dump_test_role REVOKE"] = { + "create_order": 55, + "create_sql": "ALTER DEFAULT PRIVILEGES\n" + "\t\t\t\t\t FOR ROLE regress_dump_test_role\n" + "\t\t\t\t\t REVOKE EXECUTE ON FUNCTIONS FROM PUBLIC;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER DEFAULT PRIVILEGES "), + ("lit", "FOR ROLE regress_dump_test_role "), + ("lit", "REVOKE ALL ON FUNCTIONS FROM PUBLIC;"), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["ALTER DEFAULT PRIVILEGES FOR ROLE regress_dump_test_role REVOKE SELECT"] = { + "create_order": 56, + "create_sql": "ALTER DEFAULT PRIVILEGES\n" + "\t\t\t\t\t FOR ROLE regress_dump_test_role\n" + "\t\t\t\t\t REVOKE SELECT ON TABLES FROM regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER DEFAULT PRIVILEGES "), + ("lit", "FOR ROLE regress_dump_test_role "), + ("lit", "REVOKE ALL ON TABLES FROM regress_dump_test_role;"), + ("rx", r"\n"), + ("lit", "ALTER DEFAULT PRIVILEGES "), + ("lit", "FOR ROLE regress_dump_test_role "), + ( + "lit", + "GRANT INSERT,REFERENCES,DELETE,TRIGGER,TRUNCATE,MAINTAIN,UPDATE ON TABLES TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["ALTER ROLE regress_dump_test_role"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER ROLE regress_dump_test_role WITH "), + ("lit", "NOSUPERUSER INHERIT NOCREATEROLE NOCREATEDB NOLOGIN "), + ("lit", "NOREPLICATION NOBYPASSRLS;"), + ], + XM, + ), + "like": { + "pg_dumpall_dbprivs": 1, + "pg_dumpall_globals": 1, + "pg_dumpall_globals_clean": 1, + "pg_dumpall_exclude": 1, + }, + } + tests["ALTER COLLATION test0 OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER COLLATION public.test0 OWNER TO "), + ("rx", r".+;"), + ], + re.MULTILINE, + ), + "collation": 1, + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_owner": 1}, + } + tests["ALTER FOREIGN DATA WRAPPER dummy OWNER TO"] = { + "regexp": re.compile( + r"^ALTER FOREIGN DATA WRAPPER dummy OWNER TO .+;", re.MULTILINE + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_owner": 1}, + } + tests["ALTER SERVER s1 OWNER TO"] = { + "regexp": re.compile(r"^ALTER SERVER s1 OWNER TO .+;", re.MULTILINE), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_owner": 1}, + } + tests["ALTER FUNCTION dump_test.pltestlang_call_handler() OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER FUNCTION dump_test.pltestlang_call_handler() "), + ("lit", "OWNER TO "), + ("rx", r".+;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER OPERATOR FAMILY dump_test.op_family OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER OPERATOR FAMILY dump_test.op_family USING btree "), + ("lit", "OWNER TO "), + ("rx", r".+;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER OPERATOR FAMILY dump_test.op_family USING btree"] = { + "create_order": 75, + "create_sql": "ALTER OPERATOR FAMILY dump_test.op_family USING btree ADD\n" + "\t\t\t\t\t\t OPERATOR 1 <(bigint,int4),\n" + "\t\t\t\t\t\t OPERATOR 2 <=(bigint,int4),\n" + "\t\t\t\t\t\t OPERATOR 3 =(bigint,int4),\n" + "\t\t\t\t\t\t OPERATOR 4 >=(bigint,int4),\n" + "\t\t\t\t\t\t OPERATOR 5 >(bigint,int4),\n" + "\t\t\t\t\t\t FUNCTION 1 (int4, int4) btint4cmp(int4,int4),\n" + "\t\t\t\t\t\t FUNCTION 2 (int4, int4) btint4sortsupport(internal),\n" + "\t\t\t\t\t\t FUNCTION 4 (int4, int4) btequalimage(oid);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER OPERATOR FAMILY dump_test.op_family USING btree ADD"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 1 <(bigint,integer) ,"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 2 <=(bigint,integer) ,"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 3 =(bigint,integer) ,"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 4 >=(bigint,integer) ,"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 5 >(bigint,integer) ,"), + ("rx", r"\n\s+"), + ("lit", "FUNCTION 1 (integer, integer) btint4cmp(integer,integer) ,"), + ("rx", r"\n\s+"), + ("lit", "FUNCTION 2 (bigint, bigint) btint8sortsupport(internal) ,"), + ("rx", r"\n\s+"), + ("lit", "FUNCTION 2 (integer, integer) btint4sortsupport(internal) ,"), + ("rx", r"\n\s+"), + ("lit", "FUNCTION 4 (bigint, bigint) btequalimage(oid) ,"), + ("rx", r"\n\s+"), + ("lit", "FUNCTION 4 (integer, integer) btequalimage(oid);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER OPERATOR CLASS dump_test.op_class OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER OPERATOR CLASS dump_test.op_class USING btree "), + ("lit", "OWNER TO "), + ("rx", r".+;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER PUBLICATION pub1 OWNER TO"] = { + "regexp": re.compile(r"^ALTER PUBLICATION pub1 OWNER TO .+;", re.MULTILINE), + "like": {**_full(), "section_post_data": 1}, + "unlike": {"no_owner": 1}, + } + tests["ALTER LARGE OBJECT ... OWNER TO"] = { + "regexp": re.compile(r"^ALTER LARGE OBJECT \d+ OWNER TO .+;", re.MULTILINE), + "like": { + **_full(), + "column_inserts": 1, + "data_only": 1, + "inserts": 1, + "no_schema": 1, + "section_data": 1, + "test_schema_plus_large_objects": 1, + }, + "unlike": { + "binary_upgrade": 1, + "no_large_objects": 1, + "no_owner": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + }, + } + tests["ALTER PROCEDURAL LANGUAGE pltestlang OWNER TO"] = { + "regexp": re.compile( + r"^ALTER PROCEDURAL LANGUAGE pltestlang OWNER TO .+;", re.MULTILINE + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_owner": 1}, + } + tests["ALTER SCHEMA dump_test OWNER TO"] = { + "regexp": re.compile(r"^ALTER SCHEMA dump_test OWNER TO .+;", re.MULTILINE), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER SCHEMA dump_test_second_schema OWNER TO"] = { + "regexp": re.compile( + r"^ALTER SCHEMA dump_test_second_schema OWNER TO .+;", re.MULTILINE + ), + "like": {**_full(), "role": 1, "section_pre_data": 1}, + "unlike": {"no_owner": 1}, + } + tests["ALTER SCHEMA public OWNER TO"] = { + "create_order": 15, + "create_sql": 'ALTER SCHEMA public OWNER TO "regress_quoted \\"" role";', + "regexp": re.compile(r"^ALTER SCHEMA public OWNER TO .+;", re.MULTILINE), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_owner": 1}, + } + tests["ALTER SCHEMA public OWNER TO (w/o ACL changes)"] = { + "database": "regress_public_owner", + "create_order": 100, + "create_sql": 'ALTER SCHEMA public OWNER TO "regress_quoted \\"" role";', + "regexp": re.compile(r"^(GRANT|REVOKE)", re.MULTILINE), + "like": {}, + } + tests["ALTER SEQUENCE test_table_col1_seq"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER SEQUENCE dump_test.test_table_col1_seq OWNED BY dump_test.test_table.col1;", + ), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE ONLY test_table ADD CONSTRAINT ... PRIMARY KEY"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE ONLY dump_test.test_table"), + ("rx", r" \n^\s+"), + ("lit", "ADD CONSTRAINT test_table_pkey PRIMARY KEY (col1);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + + tests["CONSTRAINT NOT NULL / NOT VALID"] = { + "create_sql": "CREATE TABLE dump_test.test_table_nn (\n" + "\t\t\t\t\t\t\tcol1 int);\n" + "\t\t\t\t\t\t\tCREATE TABLE dump_test.test_table_nn_2 (\n" + "\t\t\t\t\t\t\tcol1 int NOT NULL);\n" + "\t\t\t\t\t\t\tCREATE TABLE dump_test.test_table_nn_chld1 (\n" + "\t\t\t\t\t\t\t) INHERITS (dump_test.test_table_nn);\n" + "\t\t\t\t\t\t\tCREATE TABLE dump_test.test_table_nn_chld2 (\n" + "\t\t\t\t\t\t\t\tcol1 int\n" + "\t\t\t\t\t\t\t) INHERITS (dump_test.test_table_nn);\n" + "\t\t\t\t\t\t\tCREATE TABLE dump_test.test_table_nn_chld3 (\n" + "\t\t\t\t\t\t\t) INHERITS (dump_test.test_table_nn, dump_test.test_table_nn_2);\n" + "\t\t\tALTER TABLE dump_test.test_table_nn ADD CONSTRAINT nn NOT NULL col1 NOT VALID;\n" + "\t\t\tALTER TABLE dump_test.test_table_nn_chld1 VALIDATE CONSTRAINT nn;\n" + "\t\t\tALTER TABLE dump_test.test_table_nn_chld2 VALIDATE CONSTRAINT nn;\n" + "\t\t\tCOMMENT ON CONSTRAINT nn ON dump_test.test_table_nn IS 'nn comment is valid';\n" + "\t\t\tCOMMENT ON CONSTRAINT nn ON dump_test.test_table_nn_chld2 IS 'nn_chld2 comment is valid';", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE dump_test.test_table_nn"), + ("rx", r" \n^\s+"), + ("lit", "ADD CONSTRAINT nn NOT NULL col1 NOT VALID;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON CONSTRAINT ON test_table_nn"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COMMENT ON CONSTRAINT nn ON dump_test.test_table_nn IS"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON CONSTRAINT ON test_table_chld2"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON CONSTRAINT nn ON dump_test.test_table_nn_chld2 IS", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CONSTRAINT NOT NULL / NOT VALID (child1)"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_nn_chld1 ("), + ("rx", r"\n^\s+"), + ("lit", "CONSTRAINT nn NOT NULL col1"), + ("rx", r"$"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + "binary_upgrade": 1, + }, + } + tests["CONSTRAINT NOT NULL / NOT VALID (child2)"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_nn_chld2 ("), + ("rx", r"\n^\s+"), + ("lit", "col1 integer CONSTRAINT nn NOT NULL"), + ("rx", r"$"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CONSTRAINT NOT NULL / NOT VALID (child3)"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_nn_chld3 ("), + ("rx", r"\n^"), + ("lit", ")"), + ("rx", r"$"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + "binary_upgrade": 1, + }, + } + tests["CONSTRAINT NOT NULL / NO INHERIT"] = { + "create_sql": "CREATE TABLE dump_test.test_table_nonn (\n" + "\t\tcol1 int NOT NULL NO INHERIT,\n" + "\t\tcol2 int);\n" + "\t\tCREATE TABLE dump_test.test_table_nonn_chld1 (\n" + "\t\t CONSTRAINT nn NOT NULL col2 NO INHERIT)\n" + "\t\tINHERITS (dump_test.test_table_nonn); ", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_nonn ("), + ("rx", r" \n^\s+"), + ("lit", "col1 integer NOT NULL NO INHERIT"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1, "binary_upgrade": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CONSTRAINT NOT NULL / NO INHERIT (child1)"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_nonn_chld1 ("), + ("rx", r" \n^\s+"), + ("lit", "CONSTRAINT nn NOT NULL col2 NO INHERIT"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + "binary_upgrade": 1, + }, + } + tests["CONSTRAINT PRIMARY KEY / WITHOUT OVERLAPS"] = { + "create_sql": "CREATE TABLE dump_test.test_table_tpk (\n" + "\t\t\t\t\t\t\tcol1 int4range,\n" + "\t\t\t\t\t\t\tcol2 tstzrange,\n" + "\t\t\t\t\t\t\tCONSTRAINT test_table_tpk_pkey PRIMARY KEY (col1, col2 WITHOUT OVERLAPS));", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE ONLY dump_test.test_table_tpk"), + ("rx", r" \n^\s+"), + ( + "lit", + "ADD CONSTRAINT test_table_tpk_pkey PRIMARY KEY (col1, col2 WITHOUT OVERLAPS);", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CONSTRAINT UNIQUE / WITHOUT OVERLAPS"] = { + "create_sql": "CREATE TABLE dump_test.test_table_tuq (\n" + "\t\t\t\t\t\t\tcol1 int4range,\n" + "\t\t\t\t\t\t\tcol2 tstzrange,\n" + "\t\t\t\t\t\t\tCONSTRAINT test_table_tuq_uq UNIQUE (col1, col2 WITHOUT OVERLAPS));", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE ONLY dump_test.test_table_tuq"), + ("rx", r" \n^\s+"), + ( + "lit", + "ADD CONSTRAINT test_table_tuq_uq UNIQUE (col1, col2 WITHOUT OVERLAPS);", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE (partitioned) ADD CONSTRAINT ... FOREIGN KEY"] = { + "create_order": 4, + "create_sql": "CREATE TABLE dump_test.test_table_fk (\n" + "\t\t\t\t\t\t\tcol1 int references dump_test.test_table)\n" + "\t\t\t\t\t\t\tPARTITION BY RANGE (col1);\n" + "\t\t\t\t\t\t\tCREATE TABLE dump_test.test_table_fk_1\n" + "\t\t\t\t\t\t\tPARTITION OF dump_test.test_table_fk\n" + "\t\t\t\t\t\t\tFOR VALUES FROM (0) TO (10);", + "regexp": _qr( + [ + ( + "lit", + "ADD CONSTRAINT test_table_fk_col1_fkey FOREIGN KEY (col1) REFERENCES dump_test.test_table", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + + tests["ALTER TABLE ONLY test_table ALTER COLUMN col1 SET STATISTICS 90"] = { + "create_order": 93, + "create_sql": "ALTER TABLE dump_test.test_table ALTER COLUMN col1 SET STATISTICS 90;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE ONLY dump_test.test_table ALTER COLUMN col1 SET STATISTICS 90;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE ONLY test_table ALTER COLUMN col2 SET STORAGE"] = { + "create_order": 94, + "create_sql": "ALTER TABLE dump_test.test_table ALTER COLUMN col2 SET STORAGE EXTERNAL;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE ONLY dump_test.test_table ALTER COLUMN col2 SET STORAGE EXTERNAL;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE ONLY test_table ALTER COLUMN col3 SET STORAGE"] = { + "create_order": 95, + "create_sql": "ALTER TABLE dump_test.test_table ALTER COLUMN col3 SET STORAGE MAIN;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE ONLY dump_test.test_table ALTER COLUMN col3 SET STORAGE MAIN;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE ONLY test_table ALTER COLUMN col4 SET n_distinct"] = { + "create_order": 95, + "create_sql": "ALTER TABLE dump_test.test_table ALTER COLUMN col4 SET (n_distinct = 10);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE ONLY dump_test.test_table ALTER COLUMN col4 SET (n_distinct=10);", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests[ + "ALTER TABLE ONLY dump_test.measurement ATTACH PARTITION measurement_y2006m2" + ] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE ONLY dump_test.measurement ATTACH PARTITION dump_test_second_schema.measurement_y2006m2 ", + ), + ("lit", "FOR VALUES FROM ('2006-02-01') TO ('2006-03-01');"), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "role": 1, + "section_pre_data": 1, + "binary_upgrade": 1, + "only_dump_measurement": 1, + }, + "unlike": {"exclude_measurement": 1}, + } + tests["ALTER TABLE test_table CLUSTER ON test_table_pkey"] = { + "create_order": 96, + "create_sql": "ALTER TABLE dump_test.test_table CLUSTER ON test_table_pkey", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE dump_test.test_table CLUSTER ON test_table_pkey;"), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE test_table DISABLE TRIGGER ALL"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "SET SESSION AUTHORIZATION 'test_superuser';"), + ("rx", r"\n\n"), + ("lit", "ALTER TABLE dump_test.test_table DISABLE TRIGGER ALL;"), + ("rx", r"\n\n"), + ( + "lit", + "COPY dump_test.test_table (col1, col2, col3, col4) FROM stdin;", + ), + ("rx", r"\n(?:\d\t\\N\t\\N\t\\N\n){9}\\\.\n\n\n"), + ("lit", "ALTER TABLE dump_test.test_table ENABLE TRIGGER ALL;"), + ], + XM, + ), + "like": {"data_only": 1}, + } + tests["ALTER FOREIGN TABLE foreign_table ALTER COLUMN c1 OPTIONS"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER FOREIGN TABLE ONLY dump_test.foreign_table ALTER COLUMN c1 OPTIONS (", + ), + ("rx", r"\n\s+"), + ("lit", "column_name 'col1'"), + ("rx", r"\n"), + ("lit", ");"), + ("rx", r"\n"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE test_table OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE dump_test.test_table OWNER TO "), + ("rx", r".+;"), + ], + re.MULTILINE, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + "no_owner": 1, + }, + } + tests["ALTER TABLE test_table ENABLE ROW LEVEL SECURITY"] = { + "create_order": 23, + "create_sql": "ALTER TABLE dump_test.test_table\n" + "\t\t\t\t\t ENABLE ROW LEVEL SECURITY;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE dump_test.test_table ENABLE ROW LEVEL SECURITY;"), + ], + re.MULTILINE, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_policies": 1, + "no_policies_restore": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE test_second_table OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE dump_test.test_second_table OWNER TO "), + ("rx", r".+;"), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE measurement OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE dump_test.measurement OWNER TO "), + ("rx", r".+;"), + ], + re.MULTILINE, + ), + "like": { + **_full(), + **_dts(), + "section_pre_data": 1, + "only_dump_measurement": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "exclude_measurement": 1, + }, + } + tests["ALTER TABLE measurement_y2006m2 OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE dump_test_second_schema.measurement_y2006m2 OWNER TO ", + ), + ("rx", r".+;"), + ], + re.MULTILINE, + ), + "like": { + **_full(), + "role": 1, + "section_pre_data": 1, + "only_dump_measurement": 1, + }, + "unlike": { + "no_owner": 1, + "exclude_measurement": 1, + }, + } + tests["ALTER FOREIGN TABLE foreign_table OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER FOREIGN TABLE dump_test.foreign_table OWNER TO "), + ("rx", r".+;"), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TEXT SEARCH CONFIGURATION alt_ts_conf1 OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1 OWNER TO ", + ), + ("rx", r".+;"), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TEXT SEARCH DICTIONARY alt_ts_dict1 OWNER TO"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TEXT SEARCH DICTIONARY dump_test.alt_ts_dict1 OWNER TO ", + ), + ("rx", r".+;"), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_owner": 1, + "only_dump_measurement": 1, + }, + } + + tests["LO create (using lo_from_bytea)"] = { + "create_order": 50, + "create_sql": "SELECT pg_catalog.lo_from_bytea(0, '\\x310a320a330a340a350a360a370a380a390a');", + "regexp": re.compile(r"^SELECT pg_catalog\.lo_create\('\d+'\);", re.MULTILINE), + "like": { + **_full(), + "column_inserts": 1, + "data_only": 1, + "inserts": 1, + "no_schema": 1, + "section_data": 1, + "test_schema_plus_large_objects": 1, + }, + "unlike": { + "binary_upgrade": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "no_large_objects": 1, + }, + } + tests["LO load (using lo_from_bytea)"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "SELECT pg_catalog.lo_open"), + ("rx", r" \('\d+',\ \d+\);\n"), + ("lit", "SELECT pg_catalog.lowrite(0, "), + ("lit", "'\\x310a320a330a340a350a360a370a380a390a');"), + ("rx", r"\n"), + ("lit", "SELECT pg_catalog.lo_close(0);"), + ], + XM, + ), + "like": { + **_full(), + "column_inserts": 1, + "data_only": 1, + "inserts": 1, + "no_schema": 1, + "section_data": 1, + "test_schema_plus_large_objects": 1, + }, + "unlike": { + "binary_upgrade": 1, + "no_large_objects": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + }, + } + tests["LO create (with no data)"] = { + "create_sql": "SELECT pg_catalog.lo_create(0);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "SELECT pg_catalog.lo_open"), + ("rx", r" \('\d+',\ \d+\);\n"), + ("lit", "SELECT pg_catalog.lo_close(0);"), + ], + XM, + ), + "like": { + **_full(), + "column_inserts": 1, + "data_only": 1, + "inserts": 1, + "no_schema": 1, + "section_data": 1, + "test_schema_plus_large_objects": 1, + }, + "unlike": { + "binary_upgrade": 1, + "no_large_objects": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + }, + } + tests["COMMENT ON DATABASE postgres"] = { + "regexp": re.compile(r"^COMMENT ON DATABASE postgres IS .+;", re.MULTILINE), + "like": {"createdb": 1}, + } + tests["COMMENT ON EXTENSION plpgsql"] = { + "regexp": re.compile(r"^COMMENT ON EXTENSION plpgsql IS .+;", re.MULTILINE), + "like": {}, + } + tests["COMMENT ON SCHEMA public"] = { + "regexp": re.compile(r"^COMMENT ON SCHEMA public IS .+;", re.MULTILINE), + "like": { + "pg_dumpall_dbprivs": 1, + "pg_dumpall_exclude": 1, + }, + } + tests["COMMENT ON SCHEMA public IS NULL"] = { + "database": "regress_public_owner", + "create_order": 100, + "create_sql": "COMMENT ON SCHEMA public IS NULL;", + "regexp": re.compile(r"^COMMENT ON SCHEMA public IS '';", re.MULTILINE), + "like": {"defaults_public_owner": 1}, + } + tests["COMMENT ON TABLE dump_test.test_table"] = { + "create_order": 36, + "create_sql": "COMMENT ON TABLE dump_test.test_table\n" + "\t\t\t\t\t IS 'comment on table';", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COMMENT ON TABLE dump_test.test_table IS 'comment on table';"), + ], + re.MULTILINE, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON COLUMN dump_test.test_table.col1"] = { + "create_order": 36, + "create_sql": "COMMENT ON COLUMN dump_test.test_table.col1\n" + "\t\t\t\t\t IS 'comment on column';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON COLUMN dump_test.test_table.col1 IS 'comment on column';", + ), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON COLUMN dump_test.composite.f1"] = { + "create_order": 44, + "create_sql": "COMMENT ON COLUMN dump_test.composite.f1\n" + "\t\t\t\t\t IS 'comment on column of type';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON COLUMN dump_test.composite.f1 IS 'comment on column of type';", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON COLUMN dump_test.test_second_table.col1"] = { + "create_order": 63, + "create_sql": "COMMENT ON COLUMN dump_test.test_second_table.col1\n" + "\t\t\t\t\t IS 'comment on column col1';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON COLUMN dump_test.test_second_table.col1 IS 'comment on column col1';", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON COLUMN dump_test.test_second_table.col2"] = { + "create_order": 64, + "create_sql": "COMMENT ON COLUMN dump_test.test_second_table.col2\n" + "\t\t\t\t\t IS 'comment on column col2';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON COLUMN dump_test.test_second_table.col2 IS 'comment on column col2';", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON CONVERSION dump_test.test_conversion"] = { + "create_order": 79, + "create_sql": "COMMENT ON CONVERSION dump_test.test_conversion\n" + "\t\t\t\t\t IS 'comment on test conversion';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON CONVERSION dump_test.test_conversion IS 'comment on test conversion';", + ), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON COLLATION test0"] = { + "create_order": 77, + "create_sql": "COMMENT ON COLLATION test0\n" + "\t\t\t\t\t IS 'comment on test0 collation';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON COLLATION public.test0 IS 'comment on test0 collation';", + ), + ], + re.MULTILINE, + ), + "collation": 1, + "like": {**_full(), "section_pre_data": 1}, + } + tests["COMMENT ON LARGE OBJECT ..."] = { + "create_order": 65, + "create_sql": "DO $$\n" + "\t\t\t\t\t\t DECLARE myoid oid;\n" + "\t\t\t\t\t\t BEGIN\n" + "\t\t\t\t\t\t\tSELECT loid FROM pg_largeobject INTO myoid;\n" + "\t\t\t\t\t\t\tEXECUTE 'COMMENT ON LARGE OBJECT ' || myoid || ' IS ''comment on large object'';';\n" + "\t\t\t\t\t\t END;\n" + "\t\t\t\t\t\t $$;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COMMENT ON LARGE OBJECT "), + ("rx", r"[0-9]+"), + ("lit", " IS 'comment on large object';"), + ], + XM, + ), + "like": { + **_full(), + "column_inserts": 1, + "data_only": 1, + "inserts": 1, + "no_schema": 1, + "section_data": 1, + "test_schema_plus_large_objects": 1, + }, + "unlike": { + "no_large_objects": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + }, + } + tests["COMMENT ON POLICY p1"] = { + "create_order": 55, + "create_sql": "COMMENT ON POLICY p1 ON dump_test.test_table\n" + "\t\t\t\t\t IS 'comment on policy';", + "regexp": re.compile( + r"^COMMENT ON POLICY p1 ON dump_test.test_table IS 'comment on policy';", + re.MULTILINE, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_policies": 1, + "no_policies_restore": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON PUBLICATION pub1"] = { + "create_order": 55, + "create_sql": "COMMENT ON PUBLICATION pub1\n" + "\t\t\t\t\t IS 'comment on publication';", + "regexp": re.compile( + r"^COMMENT ON PUBLICATION pub1 IS 'comment on publication';", re.MULTILINE + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["COMMENT ON SUBSCRIPTION sub1"] = { + "create_order": 55, + "create_sql": "COMMENT ON SUBSCRIPTION sub1\n" + "\t\t\t\t\t IS 'comment on subscription';", + "regexp": re.compile( + r"^COMMENT ON SUBSCRIPTION sub1 IS 'comment on subscription';", re.MULTILINE + ), + "like": {**_full(), "section_post_data": 1}, + "unlike": { + "no_subscriptions": 1, + "no_subscriptions_restore": 1, + }, + } + + tests["COMMENT ON TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1"] = { + "create_order": 84, + "create_sql": "COMMENT ON TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1\n" + "\t\t\t\t\t IS 'comment on text search configuration';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1 IS 'comment on text search configuration';", + ), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON TEXT SEARCH DICTIONARY dump_test.alt_ts_dict1"] = { + "create_order": 84, + "create_sql": "COMMENT ON TEXT SEARCH DICTIONARY dump_test.alt_ts_dict1\n" + "\t\t\t\t\t IS 'comment on text search dictionary';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON TEXT SEARCH DICTIONARY dump_test.alt_ts_dict1 IS 'comment on text search dictionary';", + ), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON TEXT SEARCH PARSER dump_test.alt_ts_prs1"] = { + "create_order": 84, + "create_sql": "COMMENT ON TEXT SEARCH PARSER dump_test.alt_ts_prs1\n" + "\t\t\t\t\t IS 'comment on text search parser';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON TEXT SEARCH PARSER dump_test.alt_ts_prs1 IS 'comment on text search parser';", + ), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON TEXT SEARCH TEMPLATE dump_test.alt_ts_temp1"] = { + "create_order": 84, + "create_sql": "COMMENT ON TEXT SEARCH TEMPLATE dump_test.alt_ts_temp1\n" + "\t\t\t\t\t IS 'comment on text search template';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON TEXT SEARCH TEMPLATE dump_test.alt_ts_temp1 IS 'comment on text search template';", + ), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON TYPE dump_test.planets - ENUM"] = { + "create_order": 68, + "create_sql": "COMMENT ON TYPE dump_test.planets\n" + "\t\t\t\t\t IS 'comment on enum type';", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COMMENT ON TYPE dump_test.planets IS 'comment on enum type';"), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON TYPE dump_test.textrange - RANGE"] = { + "create_order": 69, + "create_sql": "COMMENT ON TYPE dump_test.textrange\n" + "\t\t\t\t\t IS 'comment on range type';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON TYPE dump_test.textrange IS 'comment on range type';", + ), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON TYPE dump_test.int42 - Regular"] = { + "create_order": 70, + "create_sql": "COMMENT ON TYPE dump_test.int42\n" + "\t\t\t\t\t IS 'comment on regular type';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON TYPE dump_test.int42 IS 'comment on regular type';", + ), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON TYPE dump_test.undefined - Undefined"] = { + "create_order": 71, + "create_sql": "COMMENT ON TYPE dump_test.undefined\n" + "\t\t\t\t\t IS 'comment on undefined type';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON TYPE dump_test.undefined IS 'comment on undefined type';", + ), + ], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COPY test_table"] = { + "create_order": 4, + "create_sql": "INSERT INTO dump_test.test_table (col1) " + "SELECT generate_series FROM generate_series(1,9);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COPY dump_test.test_table (col1, col2, col3, col4) FROM stdin;", + ), + ("rx", r"\n(?:\d\t\\N\t\\N\t\\N\n){9}\\\.\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "data_only": 1, + "no_schema": 1, + "only_dump_test_table": 1, + "section_data": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "exclude_test_table_data": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["COPY fk_reference_test_table"] = { + "create_order": 22, + "create_sql": "INSERT INTO dump_test.fk_reference_test_table (col1) " + "SELECT generate_series FROM generate_series(1,5);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COPY dump_test.fk_reference_test_table (col1) FROM stdin;"), + ("rx", r"\n(?:\d\n){5}\\\.\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "data_only": 1, + "exclude_test_table": 1, + "exclude_test_table_data": 1, + "no_schema": 1, + "section_data": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["COPY fk_reference_test_table second"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COPY dump_test.test_table (col1, col2, col3, col4) FROM stdin;", + ), + ("rx", r"\n(?:\d\t\\N\t\\N\t\\N\n){9}\\\.\n.*"), + ("lit", "COPY dump_test.fk_reference_test_table (col1) FROM stdin;"), + ("rx", r"\n(?:\d\n){5}\\\.\n"), + ], + XMS, + ), + "like": { + "data_only": 1, + "no_schema": 1, + }, + } + + tests["COPY test_second_table"] = { + "create_order": 7, + "create_sql": "INSERT INTO dump_test.test_second_table (col1, col2) " + "SELECT generate_series, generate_series::text " + "FROM generate_series(1,9);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COPY dump_test.test_second_table (col1, col2) FROM stdin;"), + ("rx", r"\n(?:\d\t\d\n){9}\\\.\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "data_only": 1, + "no_schema": 1, + "section_data": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["COPY test_third_table"] = { + "create_order": 7, + "create_sql": "INSERT INTO dump_test.test_third_table VALUES (123, DEFAULT, 456);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", 'COPY dump_test.test_third_table (f1, "F3") FROM stdin;'), + ("rx", r"\n123\t456\n\\\.\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "data_only": 1, + "no_schema": 1, + "section_data": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["COPY test_fourth_table"] = { + "create_order": 7, + "create_sql": "INSERT INTO dump_test.test_fourth_table DEFAULT VALUES;" + "INSERT INTO dump_test.test_fourth_table DEFAULT VALUES;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COPY dump_test.test_fourth_table FROM stdin;"), + ("rx", r"\n\n\n\\\.\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "data_only": 1, + "no_schema": 1, + "section_data": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["COPY test_fifth_table"] = { + "create_order": 54, + "create_sql": "INSERT INTO dump_test.test_fifth_table VALUES (NULL, true, false, '11001'::bit(5), 'NaN');", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COPY dump_test.test_fifth_table (col1, col2, col3, col4, col5) FROM stdin;", + ), + ("rx", r"\n\\N\tt\tf\t11001\tNaN\n\\\.\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "data_only": 1, + "no_schema": 1, + "section_data": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["COPY test_table_identity"] = { + "create_order": 54, + "create_sql": "INSERT INTO dump_test.test_table_identity (col2) VALUES ('test');", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COPY dump_test.test_table_identity (col1, col2) FROM stdin;"), + ("rx", r"\n1\ttest\n\\\.\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "data_only": 1, + "no_schema": 1, + "section_data": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["INSERT INTO test_table"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "rx", + r"(?:INSERT\ INTO\ dump_test\.test_table\ \(col1,\ col2,\ col3,\ col4\)\ VALUES\ \(\d,\ NULL,\ NULL,\ NULL\);\n){9}", + ), + ], + XM, + ), + "like": {"column_inserts": 1}, + } + tests["test_table with 4-row INSERTs"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "rx", + r"(?:" + r"INSERT\ INTO\ dump_test\.test_table\ VALUES\n" + r"(?:\t\(\d,\ NULL,\ NULL,\ NULL\),\n){3}" + r"\t\(\d,\ NULL,\ NULL,\ NULL\);\n" + r"){2}" + r"INSERT\ INTO\ dump_test\.test_table\ VALUES\n" + r"\t\(\d,\ NULL,\ NULL,\ NULL\);", + ), + ], + XM, + ), + "like": {"rows_per_insert": 1}, + } + tests["INSERT INTO test_second_table"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "rx", + r"(?:INSERT\ INTO\ dump_test\.test_second_table\ \(col1,\ col2\)\ VALUES\ \(\d,\ '\d'\);\n){9}", + ), + ], + XM, + ), + "like": {"column_inserts": 1}, + } + tests["INSERT INTO test_third_table (colnames)"] = { + "regexp": re.compile( + r'^INSERT INTO dump_test\.test_third_table \(f1, "F3"\) VALUES \(123, 456\);\n', + re.MULTILINE, + ), + "like": {"column_inserts": 1}, + } + tests["INSERT INTO test_third_table"] = { + "regexp": re.compile( + r"^INSERT INTO dump_test\.test_third_table VALUES \(123, DEFAULT, 456, DEFAULT\);\n", + re.MULTILINE, + ), + "like": {"inserts": 1}, + } + tests["INSERT INTO test_fourth_table"] = { + "regexp": re.compile( + r"^(?:INSERT INTO dump_test\.test_fourth_table DEFAULT VALUES;\n){2}", + re.MULTILINE, + ), + "like": {"column_inserts": 1, "inserts": 1, "rows_per_insert": 1}, + } + tests["INSERT INTO test_fifth_table"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "INSERT INTO dump_test.test_fifth_table (col1, col2, col3, col4, col5) VALUES (NULL, true, false, B'11001', 'NaN');", + ), + ], + re.MULTILINE, + ), + "like": {"column_inserts": 1}, + } + tests["INSERT INTO test_table_identity"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "INSERT INTO dump_test.test_table_identity (col1, col2) OVERRIDING SYSTEM VALUE VALUES (1, 'test');", + ), + ], + re.MULTILINE, + ), + "like": {"column_inserts": 1}, + } + + tests["CREATE ROLE regress_dump_test_role"] = { + "create_order": 1, + "create_sql": "CREATE ROLE regress_dump_test_role;", + "regexp": re.compile(r"^CREATE ROLE regress_dump_test_role;", re.MULTILINE), + "like": { + "pg_dumpall_dbprivs": 1, + "pg_dumpall_exclude": 1, + "pg_dumpall_globals": 1, + "pg_dumpall_globals_clean": 1, + }, + } + tests["CREATE ROLE regress_quoted..."] = { + "create_order": 1, + "create_sql": 'CREATE ROLE "regress_quoted \\"" role";', + "regexp": re.compile( + r'^CREATE ROLE "regress_quoted \\"" role";', re.MULTILINE + ), + "like": { + "pg_dumpall_dbprivs": 1, + "pg_dumpall_exclude": 1, + "pg_dumpall_globals": 1, + "pg_dumpall_globals_clean": 1, + }, + } + tests["newline of table name in comment"] = { + "create_sql": '-- meet getPartitioningInfo() "unsafe" condition\n' + "\t\t\t\t\t\t CREATE TYPE pp_colors AS\n" + "\t\t\t\t\t\t\tENUM ('green', 'blue', 'black');\n" + "\t\t\t\t\t\t CREATE TABLE pp_enumpart (a pp_colors)\n" + "\t\t\t\t\t\t\tPARTITION BY HASH (a);\n" + "\t\t\t\t\t\t CREATE TABLE pp_enumpart1 PARTITION OF pp_enumpart\n" + "\t\t\t\t\t\t\tFOR VALUES WITH (MODULUS 2, REMAINDER 0);\n" + "\t\t\t\t\t\t CREATE TABLE pp_enumpart2 PARTITION OF pp_enumpart\n" + "\t\t\t\t\t\t\tFOR VALUES WITH (MODULUS 2, REMAINDER 1);\n" + "\t\t\t\t\t\t ALTER TABLE pp_enumpart\n" + '\t\t\t\t\t\t\tRENAME TO "pp_enumpart\nattack";', + "regexp": re.compile(r"\n--[^\n]*\nattack", re.DOTALL), + "like": {}, + } + tests["CREATE TABLESPACE regress_dump_tablespace"] = { + "create_order": 2, + "create_sql": "\n" + "\t\t SET allow_in_place_tablespaces = on;\n" + "\t\t\tCREATE TABLESPACE regress_dump_tablespace\n" + "\t\t\tOWNER regress_dump_test_role LOCATION ''", + "regexp": re.compile( + r"^CREATE TABLESPACE regress_dump_tablespace OWNER regress_dump_test_role LOCATION '';", + re.MULTILINE, + ), + "like": { + "pg_dumpall_dbprivs": 1, + "pg_dumpall_exclude": 1, + "pg_dumpall_globals": 1, + "pg_dumpall_globals_clean": 1, + }, + } + tests["CREATE DATABASE regression_invalid..."] = { + "create_order": 1, + "create_sql": "\n" + "\t\t CREATE DATABASE regression_invalid;\n" + "\t\t\tUPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid'", + "regexp": re.compile(r"^CREATE DATABASE regression_invalid", re.MULTILINE), + "like": {}, + } + tests["CREATE ACCESS METHOD gist2"] = { + "create_order": 52, + "create_sql": "CREATE ACCESS METHOD gist2 TYPE INDEX HANDLER gisthandler;", + "regexp": re.compile( + r"CREATE ACCESS METHOD gist2 TYPE INDEX HANDLER gisthandler;", re.MULTILINE + ), + "like": {**_full(), "section_pre_data": 1}, + } + tests['CREATE COLLATION test0 FROM "C"'] = { + "create_order": 76, + "create_sql": 'CREATE COLLATION test0 FROM "C";', + "regexp": re.compile( + r"CREATE COLLATION public.test0 \(provider = libc, locale = 'C'(, version = '[^']*')?\);", + re.MULTILINE, + ), + "collation": 1, + "like": {**_full(), "section_pre_data": 1}, + } + tests["CREATE COLLATION icu_collation"] = { + "create_order": 76, + "create_sql": "CREATE COLLATION icu_collation (PROVIDER = icu, LOCALE = 'en-US-u-va-posix');", + "regexp": re.compile( + r"CREATE COLLATION public.icu_collation \(provider = icu, locale = 'en-US-u-va-posix'(, version = '[^']*')?\);", + re.MULTILINE, + ), + "icu": 1, + "like": {**_full(), "section_pre_data": 1}, + } + tests["CREATE CAST FOR timestamptz"] = { + "create_order": 51, + "create_sql": "CREATE CAST (timestamptz AS interval) WITH FUNCTION age(timestamptz) AS ASSIGNMENT;", + "regexp": re.compile( + r"CREATE CAST \(timestamp with time zone AS interval\) WITH FUNCTION pg_catalog\.age\(timestamp with time zone\) AS ASSIGNMENT;", + re.MULTILINE, + ), + "like": {**_full(), "section_pre_data": 1}, + } + tests["CREATE DATABASE postgres"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE DATABASE postgres WITH TEMPLATE = template0 "), + ("rx", r".+;"), + ], + XM, + ), + "like": {"createdb": 1}, + } + tests["CREATE DATABASE dump_test"] = { + "create_order": 47, + "create_sql": "CREATE DATABASE dump_test;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE DATABASE dump_test WITH TEMPLATE = template0 "), + ("rx", r".+;"), + ], + XM, + ), + "like": {"pg_dumpall_dbprivs": 1}, + } + tests["CREATE DATABASE dump_test2 LOCALE = 'C'"] = { + "create_order": 47, + "create_sql": "CREATE DATABASE dump_test2 LOCALE = 'C' TEMPLATE = template0;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE DATABASE dump_test2 "), + ("rx", r".*"), + ("lit", "LOCALE = 'C';"), + ], + XM, + ), + "like": {"pg_dumpall_dbprivs": 1}, + } + tests["CREATE EXTENSION ... plpgsql"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;", + ), + ], + XM, + ), + "like": {}, + } + + tests["CREATE AGGREGATE dump_test.newavg"] = { + "create_order": 25, + "create_sql": "CREATE AGGREGATE dump_test.newavg (\n" + "\t\t\t\t\t\t sfunc = int4_avg_accum,\n" + "\t\t\t\t\t\t basetype = int4,\n" + "\t\t\t\t\t\t stype = _int8,\n" + "\t\t\t\t\t\t finalfunc = int8_avg,\n" + "\t\t\t\t\t\t finalfunc_modify = shareable,\n" + "\t\t\t\t\t\t initcond1 = '{0,0}'\n" + "\t\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE AGGREGATE dump_test.newavg(integer) ("), + ("rx", r"\n\s+"), + ("lit", "SFUNC = int4_avg_accum,"), + ("rx", r"\n\s+"), + ("lit", "STYPE = bigint[],"), + ("rx", r"\n\s+"), + ("lit", "INITCOND = '{0,0}',"), + ("rx", r"\n\s+"), + ("lit", "FINALFUNC = int8_avg,"), + ("rx", r"\n\s+"), + ("lit", "FINALFUNC_MODIFY = SHAREABLE"), + ("rx", r"\n\);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "exclude_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE CONVERSION dump_test.test_conversion"] = { + "create_order": 78, + "create_sql": "CREATE DEFAULT CONVERSION dump_test.test_conversion FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE DEFAULT CONVERSION dump_test.test_conversion FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE DOMAIN dump_test.us_postal_code"] = { + "create_order": 29, + "create_sql": "CREATE DOMAIN dump_test.us_postal_code AS TEXT\n" + '\t\t COLLATE "C"\n' + "\t\t\t\t\t DEFAULT '10014'\n" + "\t\t\t\t\t CONSTRAINT nn NOT NULL\n" + "\t\t\t\t\t CHECK(VALUE ~ '^\\d{5}$' OR\n" + "\t\t\t\t\t\t\t VALUE ~ '^\\d{5}-\\d{4}$');\n" + "\t\t\t\t\t COMMENT ON CONSTRAINT nn\n" + "\t\t\t\t\t\t ON DOMAIN dump_test.us_postal_code IS 'not null';\n" + "\t\t\t\t\t COMMENT ON CONSTRAINT us_postal_code_check\n" + "\t\t\t\t\t\t ON DOMAIN dump_test.us_postal_code IS 'check it';", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE DOMAIN dump_test.us_postal_code AS text COLLATE pg_catalog.\"C\" CONSTRAINT nn NOT NULL DEFAULT '10014'::text", + ), + ("rx", r"\n\s+"), + ("lit", "CONSTRAINT us_postal_code_check CHECK "), + ("lit", "(((VALUE ~ '^\\d{5}"), + ("rx", r"\$"), + ("lit", "'::text) OR (VALUE ~ '^\\d{5}-\\d{4}"), + ("rx", r"\$"), + ("lit", "'::text)));"), + ("rx", r"(.|\n)*"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON CONSTRAINT ON DOMAIN (1)"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON CONSTRAINT nn ON DOMAIN dump_test.us_postal_code IS 'not null';", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["COMMENT ON CONSTRAINT ON DOMAIN (2)"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COMMENT ON CONSTRAINT us_postal_code_check ON DOMAIN dump_test.us_postal_code IS 'check it';", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE FUNCTION dump_test.pltestlang_call_handler"] = { + "create_order": 17, + "create_sql": "CREATE FUNCTION dump_test.pltestlang_call_handler()\n" + "\t\t\t\t\t RETURNS LANGUAGE_HANDLER AS '$libdir/plpgsql',\n" + "\t\t\t\t\t 'plpgsql_call_handler' LANGUAGE C;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE FUNCTION dump_test.pltestlang_call_handler() "), + ("lit", "RETURNS language_handler"), + ("rx", r"\n\s+"), + ("lit", "LANGUAGE c"), + ("rx", r"\n\s+AS\ \'\$"), + ("lit", "libdir/plpgsql', 'plpgsql_call_handler';"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE FUNCTION dump_test.trigger_func"] = { + "create_order": 30, + "create_sql": "CREATE FUNCTION dump_test.trigger_func()\n" + "\t\t\t\t\t RETURNS trigger LANGUAGE plpgsql\n" + "\t\t\t\t\t AS $$ BEGIN RETURN NULL; END;$$;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE FUNCTION dump_test.trigger_func() RETURNS trigger"), + ("rx", r"\n\s+"), + ("lit", "LANGUAGE plpgsql"), + ("rx", r"\n\s+AS\ \$\$"), + ("lit", " BEGIN RETURN NULL; END;"), + ("rx", r"\$\$;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE FUNCTION dump_test.event_trigger_func"] = { + "create_order": 32, + "create_sql": "CREATE FUNCTION dump_test.event_trigger_func()\n" + "\t\t\t\t\t RETURNS event_trigger LANGUAGE plpgsql\n" + "\t\t\t\t\t AS $$ BEGIN RETURN; END;$$;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE FUNCTION dump_test.event_trigger_func() RETURNS event_trigger", + ), + ("rx", r"\n\s+"), + ("lit", "LANGUAGE plpgsql"), + ("rx", r"\n\s+AS\ \$\$"), + ("lit", " BEGIN RETURN; END;"), + ("rx", r"\$\$;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE OPERATOR FAMILY dump_test.op_family"] = { + "create_order": 73, + "create_sql": "CREATE OPERATOR FAMILY dump_test.op_family USING btree;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE OPERATOR FAMILY dump_test.op_family USING btree;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + + tests["CREATE OPERATOR CLASS dump_test.op_class"] = { + "create_order": 74, + "create_sql": "CREATE OPERATOR CLASS dump_test.op_class\n" + "\t\t FOR TYPE bigint USING btree FAMILY dump_test.op_family\n" + "\t\t\t\t\t\t AS STORAGE bigint,\n" + "\t\t\t\t\t\t OPERATOR 1 <(bigint,bigint),\n" + "\t\t\t\t\t\t OPERATOR 2 <=(bigint,bigint),\n" + "\t\t\t\t\t\t OPERATOR 3 =(bigint,bigint),\n" + "\t\t\t\t\t\t OPERATOR 4 >=(bigint,bigint),\n" + "\t\t\t\t\t\t OPERATOR 5 >(bigint,bigint),\n" + "\t\t\t\t\t\t FUNCTION 1 btint8cmp(bigint,bigint),\n" + "\t\t\t\t\t\t FUNCTION 2 btint8sortsupport(internal),\n" + "\t\t\t\t\t\t FUNCTION 4 btequalimage(oid);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE OPERATOR CLASS dump_test.op_class"), + ("rx", r"\n\s+"), + ("lit", "FOR TYPE bigint USING btree FAMILY dump_test.op_family AS"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 1 <(bigint,bigint) ,"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 2 <=(bigint,bigint) ,"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 3 =(bigint,bigint) ,"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 4 >=(bigint,bigint) ,"), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 5 >(bigint,bigint) ,"), + ("rx", r"\n\s+"), + ("lit", "FUNCTION 1 (bigint, bigint) btint8cmp(bigint,bigint);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE OPERATOR CLASS dump_test.op_class_custom"] = { + "create_order": 74, + "create_sql": "CREATE OPERATOR dump_test.~~ (\n" + "\t\t\t\t\t\t\t PROCEDURE = int4eq,\n" + "\t\t\t\t\t\t\t LEFTARG = int,\n" + "\t\t\t\t\t\t\t RIGHTARG = int);\n" + "\t\t\t\t\t\t CREATE OPERATOR CLASS dump_test.op_class_custom\n" + "\t\t\t\t\t\t\t FOR TYPE int USING btree AS\n" + "\t\t\t\t\t\t\t OPERATOR 3 dump_test.~~;\n" + "\t\t\t\t\t\t CREATE TYPE dump_test.range_type_custom AS RANGE (\n" + "\t\t\t\t\t\t\t subtype = int,\n" + "\t\t\t\t\t\t\t subtype_opclass = dump_test.op_class_custom);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE OPERATOR dump_test.~~ ("), + ("rx", r"\n.+"), + ( + "lit", + "CREATE OPERATOR FAMILY dump_test.op_class_custom USING btree;", + ), + ("rx", r"\n.+"), + ("lit", "CREATE OPERATOR CLASS dump_test.op_class_custom"), + ("rx", r"\n\s+"), + ( + "lit", + "FOR TYPE integer USING btree FAMILY dump_test.op_class_custom AS", + ), + ("rx", r"\n\s+"), + ("lit", "OPERATOR 3 dump_test.~~(integer,integer);"), + ("rx", r"\n.+"), + ("lit", "CREATE TYPE dump_test.range_type_custom AS RANGE ("), + ("rx", r"\n\s+"), + ("lit", "subtype = integer,"), + ("rx", r"\n\s+"), + ("lit", "multirange_type_name = dump_test.multirange_type_custom,"), + ("rx", r"\n\s+"), + ("lit", "subtype_opclass = dump_test.op_class_custom"), + ("rx", r"\n"), + ("lit", ");"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE OPERATOR CLASS dump_test.op_class_empty"] = { + "create_order": 89, + "create_sql": "CREATE OPERATOR CLASS dump_test.op_class_empty\n" + "\t\t FOR TYPE bigint USING btree FAMILY dump_test.op_family\n" + "\t\t\t\t\t\t AS STORAGE bigint;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE OPERATOR CLASS dump_test.op_class_empty"), + ("rx", r"\n\s+"), + ("lit", "FOR TYPE bigint USING btree FAMILY dump_test.op_family AS"), + ("rx", r"\n\s+"), + ("lit", "STORAGE bigint;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE EVENT TRIGGER test_event_trigger"] = { + "create_order": 33, + "create_sql": "CREATE EVENT TRIGGER test_event_trigger\n" + "\t\t\t\t\t ON ddl_command_start\n" + "\t\t\t\t\t EXECUTE FUNCTION dump_test.event_trigger_func();", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE EVENT TRIGGER test_event_trigger "), + ("lit", "ON ddl_command_start"), + ("rx", r"\n\s+"), + ("lit", "EXECUTE FUNCTION dump_test.event_trigger_func();"), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE TRIGGER test_trigger"] = { + "create_order": 31, + "create_sql": "CREATE TRIGGER test_trigger\n" + "\t\t\t\t\t BEFORE INSERT ON dump_test.test_table\n" + "\t\t\t\t\t FOR EACH ROW WHEN (NEW.col1 > 10)\n" + "\t\t\t\t\t EXECUTE FUNCTION dump_test.trigger_func();", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE TRIGGER test_trigger BEFORE INSERT ON dump_test.test_table ", + ), + ("lit", "FOR EACH ROW WHEN ((new.col1 > 10)) "), + ("lit", "EXECUTE FUNCTION dump_test.trigger_func();"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_test_table": 1, + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TYPE dump_test.planets AS ENUM"] = { + "create_order": 37, + "create_sql": "CREATE TYPE dump_test.planets\n" + "\t\t\t\t\t AS ENUM ( 'venus', 'earth', 'mars' );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TYPE dump_test.planets AS ENUM ("), + ("rx", r"\n\s+'venus',\n\s+'earth',\n\s+'mars'\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TYPE dump_test.planets AS ENUM pg_upgrade"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TYPE dump_test.planets AS ENUM ("), + ("rx", r"\n\);.*^"), + ("lit", "ALTER TYPE dump_test.planets ADD VALUE 'venus';"), + ("rx", r"\n.*^"), + ("lit", "ALTER TYPE dump_test.planets ADD VALUE 'earth';"), + ("rx", r"\n.*^"), + ("lit", "ALTER TYPE dump_test.planets ADD VALUE 'mars';"), + ("rx", r"\n"), + ], + XMS, + ), + "like": {"binary_upgrade": 1}, + } + tests["CREATE TYPE dump_test.textrange AS RANGE"] = { + "create_order": 38, + "create_sql": "CREATE TYPE dump_test.textrange\n" + '\t\t\t\t\t AS RANGE (subtype=text, collation="C");', + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TYPE dump_test.textrange AS RANGE ("), + ("rx", r"\n\s+"), + ("lit", "subtype = text,"), + ("rx", r"\n\s+"), + ("lit", "multirange_type_name = dump_test.textmultirange,"), + ("rx", r"\n\s+"), + ("lit", 'collation = pg_catalog."C"'), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TYPE dump_test.int42"] = { + "create_order": 39, + "create_sql": "CREATE TYPE dump_test.int42;", + "regexp": _qr( + [("rx", r"^"), ("lit", "CREATE TYPE dump_test.int42;")], re.MULTILINE + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + + tests["CREATE TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1"] = { + "create_order": 80, + "create_sql": "CREATE TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1 (copy=english);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1 ("), + ("rx", r"\n\s+"), + ("lit", 'PARSER = pg_catalog."default" );'), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + _ts_conf_maps = [ + ("asciiword", "english_stem"), + ("word", "english_stem"), + ("numword", "simple"), + ("email", "simple"), + ("url", "simple"), + ("host", "simple"), + ("sfloat", "simple"), + ("version", "simple"), + ("hword_numpart", "simple"), + ("hword_part", "english_stem"), + ("hword_asciipart", "english_stem"), + ("numhword", "simple"), + ("asciihword", "english_stem"), + ("hword", "english_stem"), + ("url_path", "simple"), + ("file", "simple"), + ('"float"', "simple"), + ('"int"', "simple"), + ("uint", "simple"), + ] + _ts_conf_parts: List[_Segment] = [("rx", r"^")] + for _tok, _dict in _ts_conf_maps: + _ts_conf_parts += [ + ("lit", "ALTER TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1"), + ("rx", r"\n\s+"), + ("lit", f"ADD MAPPING FOR {_tok} WITH {_dict};"), + ("rx", r"\n\n"), + ] + tests["ALTER TEXT SEARCH CONFIGURATION dump_test.alt_ts_conf1 ..."] = { + "regexp": _qr(_ts_conf_parts, XM), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TEXT SEARCH TEMPLATE dump_test.alt_ts_temp1"] = { + "create_order": 81, + "create_sql": "CREATE TEXT SEARCH TEMPLATE dump_test.alt_ts_temp1 (lexize=dsimple_lexize);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TEXT SEARCH TEMPLATE dump_test.alt_ts_temp1 ("), + ("rx", r"\n\s+"), + ("lit", "LEXIZE = dsimple_lexize );"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TEXT SEARCH PARSER dump_test.alt_ts_prs1"] = { + "create_order": 82, + "create_sql": "CREATE TEXT SEARCH PARSER dump_test.alt_ts_prs1\n" + "\t\t(start = prsd_start, gettoken = prsd_nexttoken, end = prsd_end, lextypes = prsd_lextype);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TEXT SEARCH PARSER dump_test.alt_ts_prs1 ("), + ("rx", r"\n\s+"), + ("lit", "START = prsd_start,"), + ("rx", r"\n\s+"), + ("lit", "GETTOKEN = prsd_nexttoken,"), + ("rx", r"\n\s+"), + ("lit", "END = prsd_end,"), + ("rx", r"\n\s+"), + ("lit", "LEXTYPES = prsd_lextype );"), + ("rx", r"\n"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TEXT SEARCH DICTIONARY dump_test.alt_ts_dict1"] = { + "create_order": 83, + "create_sql": "CREATE TEXT SEARCH DICTIONARY dump_test.alt_ts_dict1 (template=simple);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TEXT SEARCH DICTIONARY dump_test.alt_ts_dict1 ("), + ("rx", r"\n\s+"), + ("lit", "TEMPLATE = pg_catalog.simple );"), + ("rx", r"\n"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE FUNCTION dump_test.int42_in"] = { + "create_order": 40, + "create_sql": "CREATE FUNCTION dump_test.int42_in(cstring)\n" + "\t\t\t\t\t RETURNS dump_test.int42 AS 'int4in'\n" + "\t\t\t\t\t LANGUAGE internal STRICT IMMUTABLE;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE FUNCTION dump_test.int42_in(cstring) RETURNS dump_test.int42", + ), + ("rx", r"\n\s+"), + ("lit", "LANGUAGE internal IMMUTABLE STRICT"), + ("rx", r"\n\s+AS\ \$\$int4in\$\$;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE FUNCTION dump_test.int42_out"] = { + "create_order": 41, + "create_sql": "CREATE FUNCTION dump_test.int42_out(dump_test.int42)\n" + "\t\t\t\t\t RETURNS cstring AS 'int4out'\n" + "\t\t\t\t\t LANGUAGE internal STRICT IMMUTABLE;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE FUNCTION dump_test.int42_out(dump_test.int42) RETURNS cstring", + ), + ("rx", r"\n\s+"), + ("lit", "LANGUAGE internal IMMUTABLE STRICT"), + ("rx", r"\n\s+AS\ \$\$int4out\$\$;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE FUNCTION ... SUPPORT"] = { + "create_order": 41, + "create_sql": "CREATE FUNCTION dump_test.func_with_support() RETURNS int LANGUAGE sql AS $$ SELECT 1 $$ SUPPORT varchar_support;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE FUNCTION dump_test.func_with_support() RETURNS integer", + ), + ("rx", r"\n\s+"), + ("lit", "LANGUAGE sql SUPPORT varchar_support"), + ("rx", r"\n\s+AS\ \$\$"), + ("lit", " SELECT 1 "), + ("rx", r"\$\$;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["Check ordering of a function that depends on a primary key"] = { + "create_order": 41, + "create_sql": "\n" + "\t\t\tCREATE TABLE dump_test.ordering_table (id int primary key, data int);\n" + "\t\t\tCREATE FUNCTION dump_test.ordering_func ()\n" + "\t\t\tRETURNS SETOF dump_test.ordering_table\n" + "\t\t\tLANGUAGE sql BEGIN ATOMIC\n" + "\t\t\tSELECT * FROM dump_test.ordering_table GROUP BY id; END;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE ONLY dump_test.ordering_table"), + ("rx", r"\n\s+"), + ("lit", "ADD CONSTRAINT ordering_table_pkey PRIMARY KEY (id);"), + ("rx", r".*^"), + ("lit", "CREATE FUNCTION dump_test.ordering_func"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE PROCEDURE dump_test.ptest1"] = { + "create_order": 41, + "create_sql": "CREATE PROCEDURE dump_test.ptest1(a int)\n" + "\t\t\t\t\t LANGUAGE SQL AS $$ INSERT INTO dump_test.test_table (col1) VALUES (a) $$;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE PROCEDURE dump_test.ptest1(IN a integer)"), + ("rx", r"\n\s+"), + ("lit", "LANGUAGE sql"), + ("rx", r"\n\s+AS\ \$\$"), + ("lit", " INSERT INTO dump_test.test_table (col1) VALUES (a) "), + ("rx", r"\$\$;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + + tests["CREATE TYPE dump_test.int42 populated"] = { + "create_order": 42, + "create_sql": "CREATE TYPE dump_test.int42 (\n" + "\t\t\t\t\t\t internallength = 4,\n" + "\t\t\t\t\t\t input = dump_test.int42_in,\n" + "\t\t\t\t\t\t output = dump_test.int42_out,\n" + "\t\t\t\t\t\t alignment = int4,\n" + "\t\t\t\t\t\t default = 42,\n" + "\t\t\t\t\t\t passedbyvalue);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TYPE dump_test.int42 ("), + ("rx", r"\n\s+"), + ("lit", "INTERNALLENGTH = 4,"), + ("rx", r"\n\s+"), + ("lit", "INPUT = dump_test.int42_in,"), + ("rx", r"\n\s+"), + ("lit", "OUTPUT = dump_test.int42_out,"), + ("rx", r"\n\s+"), + ("lit", "DEFAULT = '42',"), + ("rx", r"\n\s+"), + ("lit", "ALIGNMENT = int4,"), + ("rx", r"\n\s+"), + ("lit", "STORAGE = plain,"), + ("rx", r"\n\s+PASSEDBYVALUE\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TYPE dump_test.composite"] = { + "create_order": 43, + "create_sql": "CREATE TYPE dump_test.composite AS (\n" + "\t\t\t\t\t\t f1 int,\n" + "\t\t\t\t\t\t f2 dump_test.int42\n" + "\t\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TYPE dump_test.composite AS ("), + ("rx", r"\n\s+"), + ("lit", "f1 integer,"), + ("rx", r"\n\s+"), + ("lit", "f2 dump_test.int42"), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TYPE dump_test.undefined"] = { + "create_order": 39, + "create_sql": "CREATE TYPE dump_test.undefined;", + "regexp": _qr( + [("rx", r"^"), ("lit", "CREATE TYPE dump_test.undefined;")], re.MULTILINE + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE FOREIGN DATA WRAPPER dummy"] = { + "create_order": 35, + "create_sql": "CREATE FOREIGN DATA WRAPPER dummy;", + "regexp": re.compile(r"CREATE FOREIGN DATA WRAPPER dummy;", re.MULTILINE), + "like": {**_full(), "section_pre_data": 1}, + } + tests["CREATE SERVER s1 FOREIGN DATA WRAPPER dummy"] = { + "create_order": 36, + "create_sql": "CREATE SERVER s1 FOREIGN DATA WRAPPER dummy;", + "regexp": re.compile( + r"CREATE SERVER s1 FOREIGN DATA WRAPPER dummy;", re.MULTILINE + ), + "like": {**_full(), "section_pre_data": 1}, + } + tests["CREATE FOREIGN TABLE dump_test.foreign_table SERVER s1"] = { + "create_order": 88, + "create_sql": "CREATE FOREIGN TABLE dump_test.foreign_table (c1 int options (column_name 'col1'))\n" + "\t\t\t\t\t\tSERVER s1 OPTIONS (schema_name 'x1');", + "regexp": _qr( + [ + ("lit", "CREATE FOREIGN TABLE dump_test.foreign_table ("), + ("rx", r"\n\s+"), + ("lit", "c1 integer"), + ("rx", r"\n"), + ("lit", ")"), + ("rx", r"\n"), + ("lit", "SERVER s1"), + ("rx", r"\n"), + ("lit", "OPTIONS ("), + ("rx", r"\n\s+"), + ("lit", "schema_name 'x1'"), + ("rx", r"\n"), + ("lit", ");"), + ("rx", r"\n"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE USER MAPPING FOR regress_dump_test_role SERVER s1"] = { + "create_order": 86, + "create_sql": "CREATE USER MAPPING FOR regress_dump_test_role SERVER s1;", + "regexp": re.compile( + r"CREATE USER MAPPING FOR regress_dump_test_role SERVER s1;", re.MULTILINE + ), + "like": {**_full(), "section_pre_data": 1}, + } + tests["CREATE TRANSFORM FOR int"] = { + "create_order": 34, + "create_sql": "CREATE TRANSFORM FOR int LANGUAGE SQL (FROM SQL WITH FUNCTION prsd_lextype(internal), TO SQL WITH FUNCTION int4recv(internal));", + "regexp": re.compile( + r"CREATE TRANSFORM FOR integer LANGUAGE sql \(FROM SQL WITH FUNCTION pg_catalog\.prsd_lextype\(internal\), TO SQL WITH FUNCTION pg_catalog\.int4recv\(internal\)\);", + re.MULTILINE, + ), + "like": {**_full(), "section_pre_data": 1}, + } + tests["CREATE LANGUAGE pltestlang"] = { + "create_order": 18, + "create_sql": "CREATE LANGUAGE pltestlang\n" + "\t\t\t\t\t HANDLER dump_test.pltestlang_call_handler;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE PROCEDURAL LANGUAGE pltestlang "), + ("lit", "HANDLER dump_test.pltestlang_call_handler;"), + ], + XM, + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"exclude_dump_test_schema": 1}, + } + tests["CREATE MATERIALIZED VIEW matview"] = { + "create_order": 20, + "create_sql": "CREATE MATERIALIZED VIEW dump_test.matview (col1) AS\n" + "\t\t\t\t\t SELECT col1 FROM dump_test.test_table;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE MATERIALIZED VIEW dump_test.matview AS"), + ("rx", r"\n\s+"), + ("lit", "SELECT col1"), + ("rx", r"\n\s+"), + ("lit", "FROM dump_test.test_table"), + ("rx", r"\n\s+"), + ("lit", "WITH NO DATA;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE MATERIALIZED VIEW matview_second"] = { + "create_order": 21, + "create_sql": "CREATE MATERIALIZED VIEW\n" + "\t\t\t\t\t\t dump_test.matview_second (col1) AS\n" + "\t\t\t\t\t\t SELECT * FROM dump_test.matview;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE MATERIALIZED VIEW dump_test.matview_second AS"), + ("rx", r"\n\s+"), + ("lit", "SELECT col1"), + ("rx", r"\n\s+"), + ("lit", "FROM dump_test.matview"), + ("rx", r"\n\s+"), + ("lit", "WITH NO DATA;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE MATERIALIZED VIEW matview_third"] = { + "create_order": 58, + "create_sql": "CREATE MATERIALIZED VIEW\n" + "\t\t\t\t\t\t dump_test.matview_third (col1) AS\n" + "\t\t\t\t\t\t SELECT * FROM dump_test.matview_second WITH NO DATA;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE MATERIALIZED VIEW dump_test.matview_third AS"), + ("rx", r"\n\s+"), + ("lit", "SELECT col1"), + ("rx", r"\n\s+"), + ("lit", "FROM dump_test.matview_second"), + ("rx", r"\n\s+"), + ("lit", "WITH NO DATA;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE MATERIALIZED VIEW matview_fourth"] = { + "create_order": 59, + "create_sql": "CREATE MATERIALIZED VIEW\n" + "\t\t\t\t\t\t dump_test.matview_fourth (col1) AS\n" + "\t\t\t\t\t\t SELECT * FROM dump_test.matview_third WITH NO DATA;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE MATERIALIZED VIEW dump_test.matview_fourth AS"), + ("rx", r"\n\s+"), + ("lit", "SELECT col1"), + ("rx", r"\n\s+"), + ("lit", "FROM dump_test.matview_third"), + ("rx", r"\n\s+"), + ("lit", "WITH NO DATA;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["Check ordering of a matview that depends on a primary key"] = { + "create_order": 42, + "create_sql": "\n" + "\t\t\tCREATE MATERIALIZED VIEW dump_test.ordering_view AS\n" + "\t\t\t\tSELECT * FROM dump_test.ordering_table GROUP BY id;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE ONLY dump_test.ordering_table"), + ("rx", r"\n\s+"), + ("lit", "ADD CONSTRAINT ordering_table_pkey PRIMARY KEY (id);"), + ("rx", r".*^"), + ("lit", "CREATE MATERIALIZED VIEW dump_test.ordering_view AS"), + ("rx", r"\n\s+"), + ("lit", "SELECT id,"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + + tests["CREATE POLICY p1 ON test_table"] = { + "create_order": 22, + "create_sql": "CREATE POLICY p1 ON dump_test.test_table\n" + "\t\t\t\t\t\t USING (true)\n" + "\t\t\t\t\t\t WITH CHECK (true);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE POLICY p1 ON dump_test.test_table "), + ("lit", "USING (true) WITH CHECK (true);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_policies": 1, + "no_policies_restore": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE POLICY p2 ON test_table FOR SELECT"] = { + "create_order": 24, + "create_sql": "CREATE POLICY p2 ON dump_test.test_table\n" + "\t\t\t\t\t\t FOR SELECT TO regress_dump_test_role USING (true);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE POLICY p2 ON dump_test.test_table FOR SELECT TO regress_dump_test_role ", + ), + ("lit", "USING (true);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_policies": 1, + "no_policies_restore": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE POLICY p3 ON test_table FOR INSERT"] = { + "create_order": 25, + "create_sql": "CREATE POLICY p3 ON dump_test.test_table\n" + "\t\t\t\t\t\t FOR INSERT TO regress_dump_test_role WITH CHECK (true);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE POLICY p3 ON dump_test.test_table FOR INSERT "), + ("lit", "TO regress_dump_test_role WITH CHECK (true);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_policies": 1, + "no_policies_restore": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE POLICY p4 ON test_table FOR UPDATE"] = { + "create_order": 26, + "create_sql": "CREATE POLICY p4 ON dump_test.test_table FOR UPDATE\n" + "\t\t\t\t\t\t TO regress_dump_test_role USING (true) WITH CHECK (true);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE POLICY p4 ON dump_test.test_table FOR UPDATE TO regress_dump_test_role ", + ), + ("lit", "USING (true) WITH CHECK (true);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_policies": 1, + "no_policies_restore": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE POLICY p5 ON test_table FOR DELETE"] = { + "create_order": 27, + "create_sql": "CREATE POLICY p5 ON dump_test.test_table\n" + "\t\t\t\t\t\t FOR DELETE TO regress_dump_test_role USING (true);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE POLICY p5 ON dump_test.test_table FOR DELETE "), + ("lit", "TO regress_dump_test_role USING (true);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_policies": 1, + "no_policies_restore": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE POLICY p6 ON test_table AS RESTRICTIVE"] = { + "create_order": 27, + "create_sql": "CREATE POLICY p6 ON dump_test.test_table AS RESTRICTIVE\n" + "\t\t\t\t\t\t USING (false);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE POLICY p6 ON dump_test.test_table AS RESTRICTIVE "), + ("lit", "USING (false);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_post_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_policies": 1, + "no_policies_restore": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE PROPERTY GRAPH propgraph"] = { + "create_order": 20, + "create_sql": "CREATE PROPERTY GRAPH dump_test.propgraph;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE PROPERTY GRAPH dump_test.propgraph"), + ("rx", r";"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": {"exclude_dump_test_schema": 1, "only_dump_measurement": 1}, + } + + tests["CREATE PUBLICATION pub1"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub1;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub1 WITH (publish = 'insert, update, delete, truncate');", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub2"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub2\n" + "\t\t\t\t\t\t FOR ALL TABLES\n" + "\t\t\t\t\t\t WITH (publish = '');", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE PUBLICATION pub2 FOR ALL TABLES WITH (publish = '');"), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub3"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub3;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub3 WITH (publish = 'insert, update, delete, truncate');", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub4"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub4;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub4 WITH (publish = 'insert, update, delete, truncate');", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub5"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub5 WITH (publish_generated_columns = stored);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub5 WITH (publish = 'insert, update, delete, truncate', publish_generated_columns = stored);", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub6"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub6\n" "\t\t\t\t\t\t FOR ALL SEQUENCES;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub6 FOR ALL SEQUENCES WITH (publish = 'insert, update, delete, truncate');", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub7"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub7\n" + "\t\t\t\t\t\t FOR ALL SEQUENCES, ALL TABLES\n" + "\t\t\t\t\t\t WITH (publish = '');", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub7 FOR ALL TABLES, ALL SEQUENCES WITH (publish = '');", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub8"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub8 FOR ALL TABLES EXCEPT (TABLE dump_test.test_table);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub8 FOR ALL TABLES EXCEPT (TABLE ONLY dump_test.test_table) WITH (publish = 'insert, update, delete, truncate');", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub9"] = { + "create_order": 50, + "create_sql": "CREATE PUBLICATION pub9 FOR ALL TABLES EXCEPT (TABLE dump_test.test_table, dump_test.test_second_table);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub9 FOR ALL TABLES EXCEPT (TABLE ONLY dump_test.test_table, TABLE ONLY dump_test.test_second_table) WITH (publish = 'insert, update, delete, truncate');", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE PUBLICATION pub10"] = { + "create_order": 92, + "create_sql": "CREATE PUBLICATION pub10 FOR ALL TABLES EXCEPT (TABLE dump_test.test_inheritance_parent);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE PUBLICATION pub10 FOR ALL TABLES EXCEPT (TABLE ONLY dump_test.test_inheritance_parent, TABLE ONLY dump_test.test_inheritance_child) WITH (publish = 'insert, update, delete, truncate');", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["CREATE SUBSCRIPTION sub1"] = { + "create_order": 50, + "create_sql": "CREATE SUBSCRIPTION sub1\n" + "\t\t\t\t\t\t CONNECTION 'dbname=doesnotexist' PUBLICATION pub1\n" + "\t\t\t\t\t\t WITH (connect = false);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE SUBSCRIPTION sub1 CONNECTION 'dbname=doesnotexist' PUBLICATION pub1 WITH (connect = false, slot_name = 'sub1', streaming = parallel);", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + "unlike": { + "no_subscriptions": 1, + "no_subscriptions_restore": 1, + }, + } + tests["CREATE SUBSCRIPTION sub2"] = { + "create_order": 50, + "create_sql": "CREATE SUBSCRIPTION sub2\n" + "\t\t\t\t\t\t CONNECTION 'dbname=doesnotexist' PUBLICATION pub1\n" + "\t\t\t\t\t\t WITH (connect = false, origin = none, streaming = off);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE SUBSCRIPTION sub2 CONNECTION 'dbname=doesnotexist' PUBLICATION pub1 WITH (connect = false, slot_name = 'sub2', streaming = off, origin = none);", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + "unlike": { + "no_subscriptions": 1, + "no_subscriptions_restore": 1, + }, + } + tests["CREATE SUBSCRIPTION sub3"] = { + "create_order": 50, + "create_sql": "CREATE SUBSCRIPTION sub3\n" + "\t\t\t\t\t\t CONNECTION 'dbname=doesnotexist' PUBLICATION pub1\n" + "\t\t\t\t\t\t WITH (connect = false, origin = any, streaming = on);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE SUBSCRIPTION sub3 CONNECTION 'dbname=doesnotexist' PUBLICATION pub1 WITH (connect = false, slot_name = 'sub3', streaming = on);", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + "unlike": { + "no_subscriptions": 1, + "no_subscriptions_restore": 1, + }, + } + + tests["ALTER PUBLICATION pub1 ADD TABLE test_table"] = { + "create_order": 51, + "create_sql": "ALTER PUBLICATION pub1 ADD TABLE dump_test.test_table;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER PUBLICATION pub1 ADD TABLE ONLY dump_test.test_table;"), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["ALTER PUBLICATION pub1 ADD TABLE test_second_table"] = { + "create_order": 52, + "create_sql": "ALTER PUBLICATION pub1 ADD TABLE dump_test.test_second_table;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER PUBLICATION pub1 ADD TABLE ONLY dump_test.test_second_table;", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["ALTER PUBLICATION pub1 ADD TABLE test_sixth_table (col3, col2)"] = { + "create_order": 52, + "create_sql": "ALTER PUBLICATION pub1 ADD TABLE dump_test.test_sixth_table (col3, col2);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER PUBLICATION pub1 ADD TABLE ONLY dump_test.test_sixth_table (col2, col3);", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests[ + "ALTER PUBLICATION pub1 ADD TABLE test_seventh_table (col3, col2) WHERE (col1 = 1)" + ] = { + "create_order": 52, + "create_sql": "ALTER PUBLICATION pub1 ADD TABLE dump_test.test_seventh_table (col3, col2) WHERE (col1 = 1);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER PUBLICATION pub1 ADD TABLE ONLY dump_test.test_seventh_table (col2, col3) WHERE ((col1 = 1));", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["ALTER PUBLICATION pub3 ADD TABLES IN SCHEMA dump_test"] = { + "create_order": 51, + "create_sql": "ALTER PUBLICATION pub3 ADD TABLES IN SCHEMA dump_test;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER PUBLICATION pub3 ADD TABLES IN SCHEMA dump_test;"), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["ALTER PUBLICATION pub3 ADD TABLES IN SCHEMA public"] = { + "create_order": 52, + "create_sql": "ALTER PUBLICATION pub3 ADD TABLES IN SCHEMA public;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER PUBLICATION pub3 ADD TABLES IN SCHEMA public;"), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["ALTER PUBLICATION pub3 ADD TABLE test_table"] = { + "create_order": 51, + "create_sql": "ALTER PUBLICATION pub3 ADD TABLE dump_test.test_table;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER PUBLICATION pub3 ADD TABLE ONLY dump_test.test_table;"), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests["ALTER PUBLICATION pub4 ADD TABLE test_table WHERE (col1 > 0);"] = { + "create_order": 51, + "create_sql": "ALTER PUBLICATION pub4 ADD TABLE dump_test.test_table WHERE (col1 > 0);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER PUBLICATION pub4 ADD TABLE ONLY dump_test.test_table WHERE ((col1 > 0));", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + tests[ + "ALTER PUBLICATION pub4 ADD TABLE test_second_table WHERE (col2 = 'test');" + ] = { + "create_order": 52, + "create_sql": "ALTER PUBLICATION pub4 ADD TABLE dump_test.test_second_table WHERE (col2 = 'test');", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER PUBLICATION pub4 ADD TABLE ONLY dump_test.test_second_table WHERE ((col2 = 'test'::text));", + ), + ], + XM, + ), + "like": {**_full(), "section_post_data": 1}, + } + + tests["CREATE SCHEMA public"] = { + "regexp": re.compile(r"^CREATE SCHEMA public;", re.MULTILINE), + "like": {}, + } + tests["CREATE SCHEMA dump_test"] = { + "create_order": 2, + "create_sql": "CREATE SCHEMA dump_test;", + "regexp": re.compile(r"^CREATE SCHEMA dump_test;", re.MULTILINE), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE SCHEMA dump_test_second_schema"] = { + "create_order": 9, + "create_sql": "CREATE SCHEMA dump_test_second_schema;", + "regexp": re.compile(r"^CREATE SCHEMA dump_test_second_schema;", re.MULTILINE), + "like": {**_full(), "role": 1, "section_pre_data": 1}, + } + tests["CREATE TABLE test_table"] = { + "create_order": 3, + "create_sql": "CREATE TABLE dump_test.test_table (\n" + "\t\t\t\t\t col1 serial primary key,\n" + "\t\t\t\t\t col2 text COMPRESSION pglz,\n" + "\t\t\t\t\t col3 text,\n" + "\t\t\t\t\t col4 text,\n" + "\t\t\t\t\t CHECK (col1 <= 1000)\n" + "\t\t\t\t ) WITH (autovacuum_enabled = false, fillfactor=80);\n" + "\t\t\t\t\t COMMENT ON CONSTRAINT test_table_col1_check\n" + "\t\t\t\t\t\t ON dump_test.test_table IS 'bounds check';", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "col2 text,"), + ("rx", r"\n\s+"), + ("lit", "col3 text,"), + ("rx", r"\n\s+"), + ("lit", "col4 text,"), + ("rx", r"\n\s+"), + ("lit", "CONSTRAINT test_table_col1_check CHECK ((col1 <= 1000))"), + ("rx", r"\n"), + ("lit", ")"), + ("rx", r"\n"), + ("lit", "WITH (autovacuum_enabled='false', fillfactor='80');"), + ("rx", r"\n(.|\n)*"), + ( + "lit", + "COMMENT ON CONSTRAINT test_table_col1_check ON dump_test.test_table IS 'bounds check';", + ), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE fk_reference_test_table"] = { + "create_order": 21, + "create_sql": "CREATE TABLE dump_test.fk_reference_test_table (\n" + "\t\t\t\t\t col1 int primary key references dump_test.test_table\n" + "\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.fk_reference_test_table ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL"), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_second_table"] = { + "create_order": 6, + "create_sql": "CREATE TABLE dump_test.test_second_table (\n" + "\t\t\t\t\t col1 int,\n" + "\t\t\t\t\t col2 text\n" + "\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_second_table ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer,"), + ("rx", r"\n\s+"), + ("lit", "col2 text"), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE measurement PARTITIONED BY"] = { + "create_order": 90, + "create_sql": "CREATE TABLE dump_test.measurement (\n" + "\t\t\t\t\tcity_id serial not null,\n" + "\t\t\t\t\tlogdate date not null,\n" + "\t\t\t\t\tpeaktemp int CHECK (peaktemp >= -460),\n" + "\t\t\t\t\tunitsales int\n" + "\t\t\t\t ) PARTITION BY RANGE (logdate);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "-- Name: measurement;"), + ("rx", r".*\n"), + ("lit", "--"), + ("rx", r"\n\n"), + ("lit", "CREATE TABLE dump_test.measurement ("), + ("rx", r"\n\s+"), + ("lit", "city_id integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "logdate date NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "peaktemp integer,"), + ("rx", r"\n\s+"), + ("lit", "unitsales integer,"), + ("rx", r"\n\s+"), + ( + "lit", + "CONSTRAINT measurement_peaktemp_check CHECK ((peaktemp >= '-460'::integer))", + ), + ("rx", r"\n\)\n"), + ("lit", "PARTITION BY RANGE (logdate);"), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "section_pre_data": 1, + "only_dump_measurement": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "exclude_measurement": 1, + }, + } + tests["Partition measurement_y2006m2 creation"] = { + "create_order": 91, + "create_sql": "CREATE TABLE dump_test_second_schema.measurement_y2006m2\n" + "\t\t\t\t\t\tPARTITION OF dump_test.measurement (\n" + "\t\t\t\t\t\t\tunitsales DEFAULT 0 CHECK (unitsales >= 0)\n" + "\t\t\t\t\t\t)\n" + "\t\t\t\t\t\tFOR VALUES FROM ('2006-02-01') TO ('2006-03-01');", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test_second_schema.measurement_y2006m2 ("), + ("rx", r"\n\s+"), + ( + "lit", + "city_id integer DEFAULT nextval('dump_test.measurement_city_id_seq'::regclass) CONSTRAINT measurement_city_id_not_null NOT NULL,", + ), + ("rx", r"\n\s+"), + ( + "lit", + "logdate date CONSTRAINT measurement_logdate_not_null NOT NULL,", + ), + ("rx", r"\n\s+"), + ("lit", "peaktemp integer,"), + ("rx", r"\n\s+"), + ("lit", "unitsales integer DEFAULT 0,"), + ("rx", r"\n\s+"), + ( + "lit", + "CONSTRAINT measurement_peaktemp_check CHECK ((peaktemp >= '-460'::integer)),", + ), + ("rx", r"\n\s+"), + ( + "lit", + "CONSTRAINT measurement_y2006m2_unitsales_check CHECK ((unitsales >= 0))", + ), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": { + **_full(), + "section_pre_data": 1, + "role": 1, + "binary_upgrade": 1, + "only_dump_measurement": 1, + }, + "unlike": {"exclude_measurement": 1}, + } + tests["Creation of row-level trigger in partitioned table"] = { + "create_order": 92, + "create_sql": "CREATE TRIGGER test_trigger\n" + "\t\t AFTER INSERT ON dump_test.measurement\n" + "\t\t FOR EACH ROW EXECUTE PROCEDURE dump_test.trigger_func()", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE TRIGGER test_trigger AFTER INSERT ON dump_test.measurement ", + ), + ("lit", "FOR EACH ROW "), + ("lit", "EXECUTE FUNCTION dump_test.trigger_func();"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "section_post_data": 1, + "only_dump_measurement": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_measurement": 1, + }, + } + tests["COPY measurement"] = { + "create_order": 93, + "create_sql": "INSERT INTO dump_test.measurement (city_id, logdate, peaktemp, unitsales) " + "VALUES (1, '2006-02-12', 35, 1);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "COPY dump_test_second_schema.measurement_y2006m2 (city_id, logdate, peaktemp, unitsales) FROM stdin;", + ), + ("rx", r"\n(?:1\t2006-02-12\t35\t1\n)\\\.\n"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "data_only": 1, + "no_schema": 1, + "only_dump_measurement": 1, + "section_data": 1, + "only_dump_test_schema": 1, + "role_parallel": 1, + "role": 1, + }, + "unlike": { + "binary_upgrade": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "exclude_measurement": 1, + "only_dump_test_schema": 1, + "test_schema_plus_large_objects": 1, + "exclude_measurement_data": 1, + }, + } + tests["Disabled trigger on partition is altered"] = { + "create_order": 93, + "create_sql": "CREATE TABLE dump_test_second_schema.measurement_y2006m3\n" + "\t\t\t\t\t\tPARTITION OF dump_test.measurement\n" + "\t\t\t\t\t\tFOR VALUES FROM ('2006-03-01') TO ('2006-04-01');\n" + "\t\t\t\t\t\tALTER TABLE dump_test_second_schema.measurement_y2006m3 DISABLE TRIGGER test_trigger;\n" + "\t\t\t\t\t\tCREATE TABLE dump_test_second_schema.measurement_y2006m4\n" + "\t\t\t\t\t\tPARTITION OF dump_test.measurement\n" + "\t\t\t\t\t\tFOR VALUES FROM ('2006-04-01') TO ('2006-05-01');\n" + "\t\t\t\t\t\tALTER TABLE dump_test_second_schema.measurement_y2006m4 ENABLE REPLICA TRIGGER test_trigger;\n" + "\t\t\t\t\t\tCREATE TABLE dump_test_second_schema.measurement_y2006m5\n" + "\t\t\t\t\t\tPARTITION OF dump_test.measurement\n" + "\t\t\t\t\t\tFOR VALUES FROM ('2006-05-01') TO ('2006-06-01');\n" + "\t\t\t\t\t\tALTER TABLE dump_test_second_schema.measurement_y2006m5 ENABLE ALWAYS TRIGGER test_trigger;\n" + "\t\t\t\t\t\t", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE dump_test_second_schema.measurement_y2006m3 DISABLE TRIGGER test_trigger;", + ), + ], + XM, + ), + "like": { + **_full(), + "section_post_data": 1, + "role": 1, + "binary_upgrade": 1, + "only_dump_measurement": 1, + }, + "unlike": {"exclude_measurement": 1}, + } + tests["Replica trigger on partition is altered"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE dump_test_second_schema.measurement_y2006m4 ENABLE REPLICA TRIGGER test_trigger;", + ), + ], + XM, + ), + "like": { + **_full(), + "section_post_data": 1, + "role": 1, + "binary_upgrade": 1, + "only_dump_measurement": 1, + }, + "unlike": {"exclude_measurement": 1}, + } + tests["Always trigger on partition is altered"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE dump_test_second_schema.measurement_y2006m5 ENABLE ALWAYS TRIGGER test_trigger;", + ), + ], + XM, + ), + "like": { + **_full(), + "section_post_data": 1, + "role": 1, + "binary_upgrade": 1, + "only_dump_measurement": 1, + }, + "unlike": {"exclude_measurement": 1}, + } + tests["Disabled trigger on partition is not created"] = { + "regexp": re.compile( + r"CREATE TRIGGER test_trigger.*ON dump_test_second_schema" + ), + "like": {}, + } + tests["Triggers on partitions are not dropped"] = { + "regexp": re.compile(r"DROP TRIGGER test_trigger.*ON dump_test_second_schema"), + "like": {}, + } + + tests["CREATE TABLE test_third_table_generated_cols"] = { + "create_order": 6, + "create_sql": "CREATE TABLE dump_test.test_third_table (\n" + "\t\t\t\t\t\tf1 int, junk int,\n" + "\t\t\t\t\t\tg1 int generated always as (f1 * 2) stored,\n" + '\t\t\t\t\t\t"F3" int,\n' + '\t\t\t\t\t\tg2 int generated always as ("F3" * 3) stored\n' + "\t\t\t\t\t);\n" + "\t\t\t\t\tALTER TABLE dump_test.test_third_table DROP COLUMN junk;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_third_table ("), + ("rx", r"\n\s+"), + ("lit", "f1 integer,"), + ("rx", r"\n\s+"), + ("lit", "g1 integer GENERATED ALWAYS AS ((f1 * 2)) STORED,"), + ("rx", r"\n\s+"), + ("lit", '"F3" integer,'), + ("rx", r"\n\s+"), + ("lit", 'g2 integer GENERATED ALWAYS AS (("F3" * 3)) STORED'), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_fourth_table_zero_col"] = { + "create_order": 6, + "create_sql": "CREATE TABLE dump_test.test_fourth_table (\n" "\t\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_fourth_table ("), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_fifth_table"] = { + "create_order": 53, + "create_sql": "CREATE TABLE dump_test.test_fifth_table (\n" + "\t\t\t\t\t\t\tcol1 integer,\n" + "\t\t\t\t\t\t\tcol2 boolean,\n" + "\t\t\t\t\t\t\tcol3 boolean,\n" + "\t\t\t\t\t\t\tcol4 bit(5),\n" + "\t\t\t\t\t\t\tcol5 float8\n" + "\t\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_fifth_table ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer,"), + ("rx", r"\n\s+"), + ("lit", "col2 boolean,"), + ("rx", r"\n\s+"), + ("lit", "col3 boolean,"), + ("rx", r"\n\s+"), + ("lit", "col4 bit(5),"), + ("rx", r"\n\s+"), + ("lit", "col5 double precision"), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_sixth_table"] = { + "create_order": 6, + "create_sql": "CREATE TABLE dump_test.test_sixth_table (\n" + "\t\t\t\t\t col1 int,\n" + "\t\t\t\t\t col2 text,\n" + "\t\t\t\t\t col3 bytea\n" + "\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_sixth_table ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer,"), + ("rx", r"\n\s+"), + ("lit", "col2 text,"), + ("rx", r"\n\s+"), + ("lit", "col3 bytea"), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_seventh_table"] = { + "create_order": 6, + "create_sql": "CREATE TABLE dump_test.test_seventh_table (\n" + "\t\t\t\t\t col1 int,\n" + "\t\t\t\t\t col2 text,\n" + "\t\t\t\t\t col3 bytea\n" + "\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_seventh_table ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer,"), + ("rx", r"\n\s+"), + ("lit", "col2 text,"), + ("rx", r"\n\s+"), + ("lit", "col3 bytea"), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_table_identity"] = { + "create_order": 3, + "create_sql": "CREATE TABLE dump_test.test_table_identity (\n" + "\t\t\t\t\t col1 int generated always as identity primary key,\n" + "\t\t\t\t\t col2 text\n" + "\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_identity ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "col2 text"), + ("rx", r"\n\);.*"), + ( + "lit", + "ALTER TABLE dump_test.test_table_identity ALTER COLUMN col1 ADD GENERATED ALWAYS AS IDENTITY (", + ), + ("rx", r"\n\s+"), + ("lit", "SEQUENCE NAME dump_test.test_table_identity_col1_seq"), + ("rx", r"\n\s+"), + ("lit", "START WITH 1"), + ("rx", r"\n\s+"), + ("lit", "INCREMENT BY 1"), + ("rx", r"\n\s+"), + ("lit", "NO MINVALUE"), + ("rx", r"\n\s+"), + ("lit", "NO MAXVALUE"), + ("rx", r"\n\s+"), + ("lit", "CACHE 1"), + ("rx", r"\n\);"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_table_generated"] = { + "create_order": 3, + "create_sql": "CREATE TABLE dump_test.test_table_generated (\n" + "\t\t\t\t\t col1 int primary key,\n" + "\t\t\t\t\t col2 int generated always as (col1 * 2) stored,\n" + "\t\t\t\t\t col3 int generated always as (col1 * 3) virtual\n" + "\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_generated ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "col2 integer GENERATED ALWAYS AS ((col1 * 2)) STORED,"), + ("rx", r"\n\s+"), + ("lit", "col3 integer GENERATED ALWAYS AS ((col1 * 3))"), + ("rx", r"\n\);"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_table_generated_child1 (without local columns)"] = { + "create_order": 4, + "create_sql": "CREATE TABLE dump_test.test_table_generated_child1 ()\n" + "\t\t\t\t\t\t INHERITS (dump_test.test_table_generated);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_generated_child1 ("), + ("rx", r"\n\)\n"), + ("lit", "INHERITS (dump_test.test_table_generated);"), + ("rx", r"\n"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER TABLE test_table_generated_child1"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE ONLY dump_test.test_table_generated_child1 ALTER COLUMN col2 ", + ), + ], + re.MULTILINE, + ), + "like": {}, + } + tests["CREATE TABLE test_table_generated_child2 (with local columns)"] = { + "create_order": 4, + "create_sql": "CREATE TABLE dump_test.test_table_generated_child2 (\n" + "\t\t\t\t\t col1 int,\n" + "\t\t\t\t\t col2 int\n" + "\t\t\t\t\t ) INHERITS (dump_test.test_table_generated);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_table_generated_child2 ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer,"), + ("rx", r"\n\s+"), + ("lit", "col2 integer"), + ("rx", r"\n\)\n"), + ("lit", "INHERITS (dump_test.test_table_generated);"), + ("rx", r"\n"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE table_with_stats"] = { + "create_order": 98, + "create_sql": "CREATE TABLE dump_test.table_index_stats (\n" + "\t\t\t\t\t col1 int,\n" + "\t\t\t\t\t col2 int,\n" + "\t\t\t\t\t col3 int);\n" + "\t\t\t\t\t CREATE INDEX index_with_stats\n" + "\t\t\t\t\t ON dump_test.table_index_stats\n" + "\t\t\t\t\t ((col1 + 1), col1, (col2 + 1), (col3 + 1));\n" + "\t\t\t\t\t ALTER INDEX dump_test.index_with_stats\n" + "\t\t\t\t\t ALTER COLUMN 1 SET STATISTICS 400;\n" + "\t\t\t\t\t ALTER INDEX dump_test.index_with_stats\n" + "\t\t\t\t\t ALTER COLUMN 3 SET STATISTICS 500;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER INDEX dump_test.index_with_stats ALTER COLUMN 1 SET STATISTICS 400;", + ), + ("rx", r"\n"), + ( + "lit", + "ALTER INDEX dump_test.index_with_stats ALTER COLUMN 3 SET STATISTICS 500;", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_inheritance_parent"] = { + "create_order": 90, + "create_sql": "CREATE TABLE dump_test.test_inheritance_parent (\n" + "\t\t\t\t\t col1 int NOT NULL,\n" + "\t\t\t\t\t col2 int CHECK (col2 >= 42)\n" + "\t\t\t\t\t );", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_inheritance_parent ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "col2 integer,"), + ("rx", r"\n\s+"), + ( + "lit", + "CONSTRAINT test_inheritance_parent_col2_check CHECK ((col2 >= 42))", + ), + ("rx", r"\n"), + ("lit", ");"), + ("rx", r"\n"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE TABLE test_inheritance_child"] = { + "create_order": 91, + "create_sql": "CREATE TABLE dump_test.test_inheritance_child (\n" + "\t\t\t\t\t col1 int NOT NULL,\n" + "\t\t\t\t\t CONSTRAINT test_inheritance_child CHECK (col2 >= 142857)\n" + "\t\t\t\t\t) INHERITS (dump_test.test_inheritance_parent);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE dump_test.test_inheritance_child ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "CONSTRAINT test_inheritance_child CHECK ((col2 >= 142857))"), + ("rx", r"\n\)\n"), + ("lit", "INHERITS (dump_test.test_inheritance_parent);"), + ("rx", r"\n"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + + tests["CREATE STATISTICS extended_stats_no_options"] = { + "create_order": 97, + "create_sql": "CREATE STATISTICS dump_test.test_ext_stats_no_options\n" + "\t\t\t\t\t\tON col1, col2 FROM dump_test.test_table", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE STATISTICS dump_test.test_ext_stats_no_options ON col1, col2 FROM dump_test.test_table;", + ), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE STATISTICS extended_stats_options"] = { + "create_order": 97, + "create_sql": "CREATE STATISTICS dump_test.test_ext_stats_opts\n" + "\t\t\t\t\t\t(ndistinct) ON col1, col2 FROM dump_test.test_fifth_table", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE STATISTICS dump_test.test_ext_stats_opts (ndistinct) ON col1, col2 FROM dump_test.test_fifth_table;", + ), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER STATISTICS extended_stats_options"] = { + "create_order": 98, + "create_sql": "ALTER STATISTICS dump_test.test_ext_stats_opts SET STATISTICS 1000", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER STATISTICS dump_test.test_ext_stats_opts SET STATISTICS 1000;", + ), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE STATISTICS extended_stats_expression"] = { + "create_order": 99, + "create_sql": "CREATE STATISTICS dump_test.test_ext_stats_expr\n" + "\t\t\t\t\t\tON (2 * col1) FROM dump_test.test_fifth_table", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE STATISTICS dump_test.test_ext_stats_expr ON (2 * col1) FROM dump_test.test_fifth_table;", + ), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE SEQUENCE test_table_col1_seq"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE SEQUENCE dump_test.test_table_col1_seq"), + ("rx", r"\n\s+"), + ("lit", "AS integer"), + ("rx", r"\n\s+"), + ("lit", "START WITH 1"), + ("rx", r"\n\s+"), + ("lit", "INCREMENT BY 1"), + ("rx", r"\n\s+"), + ("lit", "NO MINVALUE"), + ("rx", r"\n\s+"), + ("lit", "NO MAXVALUE"), + ("rx", r"\n\s+"), + ("lit", "CACHE 1;"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE INDEX ON ONLY measurement"] = { + "create_order": 92, + "create_sql": "CREATE INDEX ON dump_test.measurement (city_id, logdate);", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE INDEX measurement_city_id_logdate_idx ON ONLY dump_test.measurement USING", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_measurement": 1, + }, + } + tests["ALTER TABLE measurement PRIMARY KEY"] = { + "catch_all": "CREATE ... commands", + "create_order": 93, + "create_sql": "ALTER TABLE dump_test.measurement ADD PRIMARY KEY (city_id, logdate);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "ALTER TABLE ONLY dump_test.measurement"), + ("rx", r" \n^\s+"), + ( + "lit", + "ADD CONSTRAINT measurement_pkey PRIMARY KEY (city_id, logdate);", + ), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "section_post_data": 1, + "only_dump_measurement": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_measurement": 1, + }, + } + tests["CREATE INDEX ... ON measurement_y2006_m2"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE INDEX measurement_y2006m2_city_id_logdate_idx ON dump_test_second_schema.measurement_y2006m2 ", + ), + ], + XM, + ), + "like": { + **_full(), + "role": 1, + "section_post_data": 1, + "only_dump_measurement": 1, + }, + "unlike": {"exclude_measurement": 1}, + } + tests["ALTER INDEX ... ATTACH PARTITION"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER INDEX dump_test.measurement_city_id_logdate_idx ATTACH PARTITION dump_test_second_schema.measurement_y2006m2_city_id_logdate_idx", + ), + ], + XM, + ), + "like": { + **_full(), + "role": 1, + "section_post_data": 1, + "only_dump_measurement": 1, + }, + "unlike": {"exclude_measurement": 1}, + } + tests["ALTER INDEX ... ATTACH PARTITION (primary key)"] = { + "catch_all": "CREATE ... commands", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER INDEX dump_test.measurement_pkey ATTACH PARTITION dump_test_second_schema.measurement_y2006m2_pkey", + ), + ], + XM, + ), + "like": { + **_full(), + "role": 1, + "section_post_data": 1, + "only_dump_measurement": 1, + }, + "unlike": {"exclude_measurement": 1}, + } + tests["CREATE VIEW test_view"] = { + "create_order": 61, + "create_sql": "CREATE VIEW dump_test.test_view\n" + "\t\t WITH (check_option = 'local', security_barrier = true) AS\n" + "\t\t SELECT col1 FROM dump_test.test_table;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE VIEW dump_test.test_view WITH (security_barrier='true') AS", + ), + ("rx", r"\n\s+"), + ("lit", "SELECT col1"), + ("rx", r"\n\s+"), + ("lit", "FROM dump_test.test_table"), + ("rx", r"\n\s+"), + ("lit", "WITH LOCAL CHECK OPTION;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + tests["ALTER VIEW test_view SET DEFAULT"] = { + "create_order": 62, + "create_sql": "ALTER VIEW dump_test.test_view ALTER COLUMN col1 SET DEFAULT 1;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER TABLE ONLY dump_test.test_view ALTER COLUMN col1 SET DEFAULT 1;", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "only_dump_measurement": 1, + }, + } + + tests["DROP SCHEMA public (for testing without public schema)"] = { + "database": "regress_pg_dump_test", + "create_order": 100, + "create_sql": "DROP SCHEMA public;", + "regexp": re.compile(r"^DROP SCHEMA public;", re.MULTILINE), + "like": {}, + } + tests["DROP SCHEMA public"] = { + "regexp": re.compile(r"^DROP SCHEMA public;", re.MULTILINE), + "like": {}, + } + tests["DROP SCHEMA IF EXISTS public"] = { + "regexp": re.compile(r"^DROP SCHEMA IF EXISTS public;", re.MULTILINE), + "like": {}, + } + tests["DROP EXTENSION plpgsql"] = { + "regexp": re.compile(r"^DROP EXTENSION plpgsql;", re.MULTILINE), + "like": {}, + } + tests["DROP FUNCTION dump_test.pltestlang_call_handler()"] = { + "regexp": re.compile( + r"^DROP FUNCTION dump_test\.pltestlang_call_handler\(\);", re.MULTILINE + ), + "like": {"clean": 1}, + } + tests["DROP LANGUAGE pltestlang"] = { + "regexp": re.compile(r"^DROP PROCEDURAL LANGUAGE pltestlang;", re.MULTILINE), + "like": {"clean": 1}, + } + tests["DROP SCHEMA dump_test"] = { + "regexp": re.compile(r"^DROP SCHEMA dump_test;", re.MULTILINE), + "like": {"clean": 1}, + } + tests["DROP SCHEMA dump_test_second_schema"] = { + "regexp": re.compile(r"^DROP SCHEMA dump_test_second_schema;", re.MULTILINE), + "like": {"clean": 1}, + } + tests["DROP TABLE test_table"] = { + "regexp": re.compile(r"^DROP TABLE dump_test\.test_table;", re.MULTILINE), + "like": {"clean": 1}, + } + tests["DROP TABLE fk_reference_test_table"] = { + "regexp": re.compile( + r"^DROP TABLE dump_test\.fk_reference_test_table;", re.MULTILINE + ), + "like": {"clean": 1}, + } + tests["DROP TABLE test_second_table"] = { + "regexp": re.compile( + r"^DROP TABLE dump_test\.test_second_table;", re.MULTILINE + ), + "like": {"clean": 1}, + } + tests["DROP EXTENSION IF EXISTS plpgsql"] = { + "regexp": re.compile(r"^DROP EXTENSION IF EXISTS plpgsql;", re.MULTILINE), + "like": {}, + } + tests["DROP FUNCTION IF EXISTS dump_test.pltestlang_call_handler()"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "DROP FUNCTION IF EXISTS dump_test.pltestlang_call_handler();"), + ], + XM, + ), + "like": {"clean_if_exists": 1}, + } + tests["DROP LANGUAGE IF EXISTS pltestlang"] = { + "regexp": re.compile( + r"^DROP PROCEDURAL LANGUAGE IF EXISTS pltestlang;", re.MULTILINE + ), + "like": {"clean_if_exists": 1}, + } + tests["DROP SCHEMA IF EXISTS dump_test"] = { + "regexp": re.compile(r"^DROP SCHEMA IF EXISTS dump_test;", re.MULTILINE), + "like": {"clean_if_exists": 1}, + } + tests["DROP SCHEMA IF EXISTS dump_test_second_schema"] = { + "regexp": re.compile( + r"^DROP SCHEMA IF EXISTS dump_test_second_schema;", re.MULTILINE + ), + "like": {"clean_if_exists": 1}, + } + tests["DROP TABLE IF EXISTS test_table"] = { + "regexp": re.compile( + r"^DROP TABLE IF EXISTS dump_test\.test_table;", re.MULTILINE + ), + "like": {"clean_if_exists": 1}, + } + tests["DROP TABLE IF EXISTS test_second_table"] = { + "regexp": re.compile( + r"^DROP TABLE IF EXISTS dump_test\.test_second_table;", re.MULTILINE + ), + "like": {"clean_if_exists": 1}, + } + tests["DROP ROLE regress_dump_test_role"] = { + "regexp": _qr([("rx", r"^"), ("lit", "DROP ROLE regress_dump_test_role;")], XM), + "like": {"pg_dumpall_globals_clean": 1}, + } + tests["DROP ROLE pg_"] = { + "regexp": _qr([("rx", r"^"), ("lit", "DROP ROLE pg_"), ("rx", r".+;")], XM), + "like": {}, + } + tests["GRANT USAGE ON SCHEMA dump_test_second_schema"] = { + "create_order": 10, + "create_sql": "GRANT USAGE ON SCHEMA dump_test_second_schema\n" + "\t\t\t\t\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT USAGE ON SCHEMA dump_test_second_schema TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), "role": 1, "section_pre_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["GRANT USAGE ON FOREIGN DATA WRAPPER dummy"] = { + "create_order": 85, + "create_sql": "GRANT USAGE ON FOREIGN DATA WRAPPER dummy\n" + "\t\t\t\t\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT ALL ON FOREIGN DATA WRAPPER dummy TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["GRANT USAGE ON FOREIGN SERVER s1"] = { + "create_order": 85, + "create_sql": "GRANT USAGE ON FOREIGN SERVER s1\n" + "\t\t\t\t\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "GRANT ALL ON FOREIGN SERVER s1 TO regress_dump_test_role;"), + ], + XM, + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["GRANT USAGE ON DOMAIN dump_test.us_postal_code"] = { + "create_order": 72, + "create_sql": "GRANT USAGE ON DOMAIN dump_test.us_postal_code TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT ALL ON TYPE dump_test.us_postal_code TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests["GRANT USAGE ON TYPE dump_test.int42"] = { + "create_order": 87, + "create_sql": "GRANT USAGE ON TYPE dump_test.int42 TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "GRANT ALL ON TYPE dump_test.int42 TO regress_dump_test_role;"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests["GRANT USAGE ON TYPE dump_test.planets - ENUM"] = { + "create_order": 66, + "create_sql": "GRANT USAGE ON TYPE dump_test.planets TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT ALL ON TYPE dump_test.planets TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests["GRANT USAGE ON TYPE dump_test.textrange - RANGE"] = { + "create_order": 67, + "create_sql": "GRANT USAGE ON TYPE dump_test.textrange TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT ALL ON TYPE dump_test.textrange TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests["GRANT CREATE ON DATABASE dump_test"] = { + "create_order": 48, + "create_sql": "GRANT CREATE ON DATABASE dump_test TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT CREATE ON DATABASE dump_test TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {"pg_dumpall_dbprivs": 1}, + } + tests["GRANT SELECT ON TABLE test_table"] = { + "create_order": 5, + "create_sql": "GRANT SELECT ON TABLE dump_test.test_table\n" + "\t\t\t\t\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT SELECT ON TABLE dump_test.test_table TO regress_dump_test_role;", + ), + ], + re.MULTILINE, + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "exclude_test_table": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests["GRANT SELECT ON TABLE measurement"] = { + "create_order": 91, + "create_sql": "GRANT SELECT ON TABLE dump_test.measurement\n" + "\t\t\t\t\t\t TO regress_dump_test_role;\n" + "\t\t\t\t\t GRANT SELECT(city_id) ON TABLE dump_test.measurement\n" + '\t\t\t\t\t\t TO "regress_quoted \\"" role";', + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT SELECT ON TABLE dump_test.measurement TO regress_dump_test_role;", + ), + ("rx", r"\n.*^"), + ( + "lit", + 'GRANT SELECT(city_id) ON TABLE dump_test.measurement TO "regress_quoted \\"" role";', + ), + ], + XMS, + ), + "like": { + **_full(), + **_dts(), + "section_pre_data": 1, + "only_dump_measurement": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "exclude_measurement": 1, + }, + } + tests["GRANT SELECT ON TABLE measurement_y2006m2"] = { + "create_order": 94, + "create_sql": "GRANT SELECT ON TABLE\n" + "\t\t\t\t\t\t dump_test_second_schema.measurement_y2006m2,\n" + "\t\t\t\t\t\t dump_test_second_schema.measurement_y2006m3,\n" + "\t\t\t\t\t\t dump_test_second_schema.measurement_y2006m4,\n" + "\t\t\t\t\t\t dump_test_second_schema.measurement_y2006m5\n" + "\t\t\t\t\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT SELECT ON TABLE dump_test_second_schema.measurement_y2006m2 TO regress_dump_test_role;", + ), + ], + re.MULTILINE, + ), + "like": { + **_full(), + "role": 1, + "section_pre_data": 1, + "only_dump_measurement": 1, + }, + "unlike": { + "no_privs": 1, + "exclude_measurement": 1, + }, + } + + tests["GRANT ALL ON LARGE OBJECT ..."] = { + "create_order": 60, + "create_sql": "DO $$\n" + "\t\t\t\t\t\t DECLARE myoid oid;\n" + "\t\t\t\t\t\t BEGIN\n" + "\t\t\t\t\t\t\tSELECT loid FROM pg_largeobject INTO myoid;\n" + "\t\t\t\t\t\t\tEXECUTE 'GRANT ALL ON LARGE OBJECT ' || myoid || ' TO regress_dump_test_role;';\n" + "\t\t\t\t\t\t END;\n" + "\t\t\t\t\t\t $$;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "GRANT ALL ON LARGE OBJECT "), + ("rx", r"[0-9]+"), + ("lit", " TO regress_dump_test_role;"), + ], + XM, + ), + "like": { + **_full(), + "column_inserts": 1, + "data_only": 1, + "inserts": 1, + "no_schema": 1, + "section_data": 1, + "test_schema_plus_large_objects": 1, + }, + "unlike": { + "binary_upgrade": 1, + "no_large_objects": 1, + "no_privs": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + }, + } + tests["GRANT INSERT(col1) ON TABLE test_second_table"] = { + "create_order": 8, + "create_sql": "GRANT INSERT (col1) ON TABLE dump_test.test_second_table\n" + "\t\t\t\t\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT INSERT(col1) ON TABLE dump_test.test_second_table TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests["GRANT SELECT ON PROPERTY GRAPH propgraph"] = { + "create_order": 21, + "create_sql": "GRANT SELECT ON PROPERTY GRAPH dump_test.propgraph TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT ALL ON PROPERTY GRAPH dump_test.propgraph TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_privs": 1, + "only_dump_measurement": 1, + }, + } + tests["GRANT EXECUTE ON FUNCTION pg_sleep() TO regress_dump_test_role"] = { + "create_order": 16, + "create_sql": "GRANT EXECUTE ON FUNCTION pg_sleep(float8)\n" + "\t\t\t\t\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT ALL ON FUNCTION pg_catalog.pg_sleep(double precision) TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_privs": 1}, + } + _proc_cols = [ + "tableoid", + "oid", + "proname", + "pronamespace", + "proowner", + "prolang", + "procost", + "prorows", + "provariadic", + "prosupport", + "prokind", + "prosecdef", + "proleakproof", + "proisstrict", + "proretset", + "provolatile", + "proparallel", + "pronargs", + "pronargdefaults", + "prorettype", + "proargtypes", + "proallargtypes", + "proargmodes", + "proargnames", + "proargdefaults", + "protrftypes", + "prosrc", + "probin", + "proconfig", + "proacl", + ] + _proc_create_cols = ",\n\t\t\t\t\t\t ".join(_proc_cols) + _proc_parts: List[_Segment] = [] + for _i, _col in enumerate(_proc_cols): + if _i > 0: + _proc_parts.append(("rx", r"\n.*")) + _proc_parts.append( + ("lit", f"GRANT SELECT({_col}) ON TABLE pg_catalog.pg_proc TO PUBLIC;") + ) + tests["GRANT SELECT (proname ...) ON TABLE pg_proc TO public"] = { + "create_order": 46, + "create_sql": "GRANT SELECT (\n" + "\t\t\t\t\t\t " + _proc_create_cols + "\n" + "\t\t\t\t\t\t) ON TABLE pg_proc TO public;", + "regexp": _qr(_proc_parts, XMS), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["GRANT USAGE ON SCHEMA public TO public"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "--"), + ("rx", r"\n\n"), + ("lit", "GRANT USAGE ON SCHEMA public TO PUBLIC;"), + ], + XM, + ), + "like": {}, + } + tests["REFRESH MATERIALIZED VIEW matview"] = { + "regexp": _qr( + [("rx", r"^"), ("lit", "REFRESH MATERIALIZED VIEW dump_test.matview;")], + re.MULTILINE, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["REFRESH MATERIALIZED VIEW matview_second"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "REFRESH MATERIALIZED VIEW dump_test.matview;"), + ("rx", r"\n.*"), + ("lit", "REFRESH MATERIALIZED VIEW dump_test.matview_second;"), + ], + XMS, + ), + "like": {**_full(), **_dts(), "section_post_data": 1}, + "unlike": { + "binary_upgrade": 1, + "exclude_dump_test_schema": 1, + "schema_only": 1, + "schema_only_with_statistics": 1, + "only_dump_measurement": 1, + }, + } + tests["REFRESH MATERIALIZED VIEW matview_third"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "REFRESH MATERIALIZED VIEW dump_test.matview_third;"), + ], + XMS, + ), + "like": {}, + } + tests["REFRESH MATERIALIZED VIEW matview_fourth"] = { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "REFRESH MATERIALIZED VIEW dump_test.matview_fourth;"), + ], + XMS, + ), + "like": {}, + } + tests["REVOKE CONNECT ON DATABASE dump_test FROM public"] = { + "create_order": 49, + "create_sql": "REVOKE CONNECT ON DATABASE dump_test FROM public;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "REVOKE CONNECT,TEMPORARY ON DATABASE dump_test FROM PUBLIC;"), + ("rx", r"\n"), + ("lit", "GRANT TEMPORARY ON DATABASE dump_test TO PUBLIC;"), + ("rx", r"\n"), + ( + "lit", + "GRANT CREATE ON DATABASE dump_test TO regress_dump_test_role;", + ), + ], + XM, + ), + "like": {"pg_dumpall_dbprivs": 1}, + } + tests["REVOKE EXECUTE ON FUNCTION pg_sleep() FROM public"] = { + "create_order": 15, + "create_sql": "REVOKE EXECUTE ON FUNCTION pg_sleep(float8)\n" + "\t\t\t\t\t\t FROM public;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "REVOKE ALL ON FUNCTION pg_catalog.pg_sleep(double precision) FROM PUBLIC;", + ), + ], + XM, + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["REVOKE EXECUTE ON FUNCTION pg_stat_reset FROM regress_dump_test_role"] = { + "create_order": 15, + "create_sql": "\n" + "\t\t\tALTER FUNCTION pg_stat_reset OWNER TO regress_dump_test_role;\n" + "\t\t\tREVOKE EXECUTE ON FUNCTION pg_stat_reset\n" + "\t\t\t FROM regress_dump_test_role;", + "regexp": re.compile( + r"^[^-].*pg_stat_reset.* regress_dump_test_role", re.MULTILINE + ), + "like": {}, + } + tests["REVOKE SELECT ON TABLE pg_proc FROM public"] = { + "create_order": 45, + "create_sql": "REVOKE SELECT ON TABLE pg_proc FROM public;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "REVOKE SELECT ON TABLE pg_catalog.pg_proc FROM PUBLIC;"), + ], + re.MULTILINE, + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["REVOKE ALL ON SCHEMA public"] = { + "create_order": 16, + "create_sql": 'REVOKE ALL ON SCHEMA public FROM "regress_quoted \\"" role";', + "regexp": re.compile( + r'^REVOKE ALL ON SCHEMA public FROM "regress_quoted \\"" role";', + re.MULTILINE, + ), + "like": {**_full(), "section_pre_data": 1}, + "unlike": {"no_privs": 1}, + } + tests["REVOKE USAGE ON LANGUAGE plpgsql FROM public"] = { + "create_order": 16, + "create_sql": "REVOKE USAGE ON LANGUAGE plpgsql FROM public;", + "regexp": re.compile( + r"^REVOKE ALL ON LANGUAGE plpgsql FROM PUBLIC;", re.MULTILINE + ), + "like": { + **_full(), + **_dts(), + "only_dump_test_table": 1, + "role": 1, + "section_pre_data": 1, + "only_dump_measurement": 1, + }, + "unlike": {"no_privs": 1}, + } + + tests["CREATE ACCESS METHOD regress_test_table_am"] = { + "create_order": 11, + "create_sql": "CREATE ACCESS METHOD regress_table_am TYPE TABLE HANDLER heap_tableam_handler;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE ACCESS METHOD regress_table_am TYPE TABLE HANDLER heap_tableam_handler;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": {**_full(), "section_pre_data": 1}, + } + tests["CREATE TABLE regress_pg_dump_table_am"] = { + "create_order": 12, + "create_sql": "\n" + "\t\t\tCREATE TABLE dump_test.regress_pg_dump_table_am_0() USING heap;\n" + "\t\t\tCREATE TABLE dump_test.regress_pg_dump_table_am_1 (col1 int) USING regress_table_am;\n" + "\t\t\tCREATE TABLE dump_test.regress_pg_dump_table_am_2() USING heap;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "SET default_table_access_method = regress_table_am;"), + ("rx", r"(\n(?!SET[^;]+;)[^\n]*)*"), + ("rx", r"\n"), + ("lit", "CREATE TABLE dump_test.regress_pg_dump_table_am_1 ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer"), + ("rx", r"\n\);"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_table_access_method": 1, + "only_dump_measurement": 1, + }, + } + tests["CREATE MATERIALIZED VIEW regress_pg_dump_matview_am"] = { + "create_order": 13, + "create_sql": "\n" + "\t\t\tCREATE MATERIALIZED VIEW dump_test.regress_pg_dump_matview_am_0 USING heap AS SELECT 1;\n" + "\t\t\tCREATE MATERIALIZED VIEW dump_test.regress_pg_dump_matview_am_1\n" + "\t\t\t\tUSING regress_table_am AS SELECT count(*) FROM pg_class;\n" + "\t\t\tCREATE MATERIALIZED VIEW dump_test.regress_pg_dump_matview_am_2 USING heap AS SELECT 1;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "SET default_table_access_method = regress_table_am;"), + ("rx", r"(\n(?!SET[^;]+;)[^\n]*)*"), + ( + "lit", + "CREATE MATERIALIZED VIEW dump_test.regress_pg_dump_matview_am_1 AS", + ), + ("rx", r"\n\s+"), + ("lit", "SELECT count(*) AS count"), + ("rx", r"\n\s+"), + ("lit", "FROM pg_class"), + ("rx", r"\n\s+"), + ("lit", "WITH NO DATA;"), + ("rx", r"\n"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_table_access_method": 1, + "only_dump_measurement": 1, + }, + } + tests["statistics_import"] = { + "create_sql": "\n" + "\t\t\tCREATE TABLE dump_test.has_stats\n" + "\t\t\tAS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g);\n" + "\t\t\tCREATE MATERIALIZED VIEW dump_test.has_stats_mv AS SELECT * FROM dump_test.has_stats;\n" + '\t\t\tCREATE INDEX """dump_test""\'s post-data index" ON dump_test.has_stats(x, (x - 1));\n' + "\t\t\tANALYZE dump_test.has_stats, dump_test.has_stats_mv;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "SELECT * FROM pg_catalog.pg_restore_relation_stats("), + ("rx", r"\s+"), + ("rx", r"'version',\s'\d+'::integer,\s+"), + ("rx", r"'schemaname',\s'dump_test',\s+"), + ("rx", r"'relname',\s'\"dump_test\"''s\ post-data\ index',\s+"), + ("rx", r"'relpages',\s'\d+'::integer,\s+"), + ("rx", r"'reltuples',\s'\d+'::real,\s+"), + ("rx", r"'relallvisible',\s'\d+'::integer,\s+"), + ("rx", r"'relallfrozen',\s'\d+'::integer\s+"), + ("rx", r"\);\s+"), + ("lit", "SELECT * FROM pg_catalog.pg_restore_attribute_stats("), + ("rx", r"\s+"), + ("rx", r"'version',\s'\d+'::integer,\s+"), + ("rx", r"'schemaname',\s'dump_test',\s+"), + ("rx", r"'relname',\s'\"dump_test\"''s\ post-data\ index',\s+"), + ("rx", r"'attnum',\s'2'::smallint,\s+"), + ("rx", r"'inherited',\s'f'::boolean,\s+"), + ("rx", r"'null_frac',\s'0'::real,\s+"), + ("rx", r"'avg_width',\s'4'::integer,\s+"), + ("rx", r"'n_distinct',\s'-1'::real,\s+"), + ("rx", r"'histogram_bounds',\s'\{[0-9,]+\}'::text,\s+"), + ("rx", r"'correlation',\s'1'::real\s+"), + ("rx", r"\);"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "no_data_no_schema": 1, + "no_schema": 1, + "section_post_data": 1, + "statistics_only": 1, + "schema_only_with_statistics": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "no_statistics": 1, + "only_dump_measurement": 1, + "schema_only": 1, + }, + } + tests["extended_statistics_import"] = { + "create_sql": "\n" + "\t\t\tCREATE TABLE dump_test.has_ext_stats\n" + "\t\t\tAS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g);\n" + "\t\t\tCREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats;\n" + "\t\t\tANALYZE dump_test.has_ext_stats;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "SELECT * FROM pg_catalog.pg_restore_extended_stats("), + ("rx", r"\s+"), + ], + XM, + ), + "like": { + **_full(), + **_dts(), + "no_data_no_schema": 1, + "no_schema": 1, + "section_post_data": 1, + "statistics_only": 1, + "schema_only_with_statistics": 1, + }, + "unlike": { + "exclude_dump_test_schema": 1, + "no_statistics": 1, + "only_dump_measurement": 1, + "schema_only": 1, + }, + } + tests["relstats_on_unanalyzed_tables"] = { + "regexp": re.compile(r"pg_catalog.pg_restore_relation_stats"), + "like": { + **_full(), + **_dts(), + "no_data_no_schema": 1, + "no_schema": 1, + "only_dump_test_table": 1, + "role": 1, + "role_parallel": 1, + "section_data": 1, + "section_post_data": 1, + "statistics_only": 1, + "schema_only_with_statistics": 1, + }, + "unlike": { + "no_statistics": 1, + "schema_only": 1, + }, + } + tests["CREATE TABLE regress_pg_dump_table_part"] = { + "create_order": 19, + "create_sql": "\n" + "\t\t\tCREATE TABLE dump_test.regress_pg_dump_table_am_parent (id int) PARTITION BY LIST (id);\n" + "\t\t\tALTER TABLE dump_test.regress_pg_dump_table_am_parent SET ACCESS METHOD regress_table_am;\n" + "\t\t\tCREATE TABLE dump_test.regress_pg_dump_table_am_child_1\n" + "\t\t\t PARTITION OF dump_test.regress_pg_dump_table_am_parent FOR VALUES IN (1);\n" + "\t\t\tCREATE TABLE dump_test.regress_pg_dump_table_am_child_2\n" + "\t\t\t PARTITION OF dump_test.regress_pg_dump_table_am_parent FOR VALUES IN (2) USING heap;", + "regexp": _qr( + [ + ("rx", r"^"), + ("rx", r"\n"), + ("lit", "CREATE TABLE dump_test.regress_pg_dump_table_am_parent ("), + ("rx", r"(\n(?!SET[^;]+;)[^\n]*)*"), + ( + "lit", + "ALTER TABLE dump_test.regress_pg_dump_table_am_parent SET ACCESS METHOD regress_table_am;", + ), + ("rx", r"(.*\n)*"), + ("lit", "SET default_table_access_method = regress_table_am;"), + ("rx", r"(\n(?!SET[^;]+;)[^\n]*)*"), + ("rx", r"\n"), + ("lit", "CREATE TABLE dump_test.regress_pg_dump_table_am_child_1 ("), + ("rx", r"(.*\n)*"), + ("lit", "SET default_table_access_method = heap;"), + ("rx", r"(\n(?!SET[^;]+;)[^\n]*)*"), + ("rx", r"\n"), + ("lit", "CREATE TABLE dump_test.regress_pg_dump_table_am_child_2 ("), + ("rx", r"(.*\n)*"), + ], + XM, + ), + "like": {**_full(), **_dts(), "section_pre_data": 1}, + "unlike": { + "exclude_dump_test_schema": 1, + "no_table_access_method": 1, + "only_dump_measurement": 1, + }, + } + + return tests + + +def _create_order_key(item: Tuple[str, dict]) -> Tuple[int, int]: + """Sort key reproducing the Perl create_order comparator. + + Tests with create_order come first, ordered numerically; tests without it + follow in their existing (insertion) order, matching the stable sort the + Perl comparator yields for the no-order pairs. + """ + order = item[1].get("create_order") + if order is None: + return (1, 0) + return (0, order) + + +def _build_create_sql( + tests: Dict[str, dict], collation_support: bool, supports_icu: bool +) -> Dict[str, str]: + """Collect each test's create_sql per database in create_order. + + Mirrors the Perl seeding loop: tests are walked in create_order, an 'icu' + test implies 'collation', collation/icu tests are skipped when unsupported, + and each create_sql is normalized (stripped, given a trailing ';' if + missing, then two newlines) and appended to its target database's buffer. + """ + create_sql: Dict[str, str] = {} + for _name, test in sorted(tests.items(), key=_create_order_key): + test_db = test.get("database", "postgres") + if test.get("icu"): + test["collation"] = 1 + if not test.get("create_sql"): + continue + if not collation_support and test.get("collation"): + continue + if not supports_icu and test.get("icu"): + continue + sql = test["create_sql"] + sql = sql.rstrip("\n") + if not sql.endswith(";"): + sql += ";" + create_sql[test_db] = create_sql.get(test_db, "") + sql + "\n\n" + return create_sql + + +def _check_test_definitions(tests: Dict[str, dict], test_key: str) -> None: + """Reproduce the Perl die() sanity checks for like/unlike completeness.""" + for name, test in tests.items(): + if test.get("all_runs") is None and test.get("like") is None: + raise AssertionError(f'missing "like" in test "{name}"') + unlike = test.get("unlike") or {} + like = test.get("like") or {} + if unlike.get(test_key) and like.get(test_key) is None: + raise AssertionError( + f'useless "unlike" entry "{test_key}" in test "{name}"' + ) + + +def _run_tests_for_output( # pylint: disable=too-many-arguments,too-many-positional-arguments + run: str, + test_key: str, + run_db: str, + output_file: str, + tests: Dict[str, dict], + collation_support: bool, + supports_icu: bool, +) -> None: + """Apply every test's like/unlike rule to one run's dumped SQL.""" + _check_test_definitions(tests, test_key) + for name in sorted(tests): + test = tests[name] + like = test.get("like") or {} + unlike = test.get("unlike") or {} + regexp: Pattern[str] = test["regexp"] + test_db = test.get("database", "postgres") + + if not collation_support and test.get("collation"): + continue + if not supports_icu and test.get("icu"): + continue + if run_db != test_db: + continue + + if (like.get(test_key) or test.get("all_runs")) and unlike.get( + test_key + ) is None: + assert regexp.search( + output_file + ), f"{run}: should dump {name}\nReview {run} results" + else: + assert not regexp.search( + output_file + ), f"{run}: should not dump {name}\nReview {run} results" + + +def test_002_pg_dump(create_pg): + """pg_dump/pg_restore matrix against a single seeded server.""" + with tempfile.TemporaryDirectory(prefix="pg_dump_002") as tempdir: + _run_matrix(create_pg, tempdir) + + +def _seed_server(node, tempdir, tests, collation_support, supports_icu): + """Create extra databases and run the ordered create_sql per database.""" + node.safe_psql("create database regress_pg_dump_test;") + node.safe_psql("create database regress_public_owner;") + create_sql = _build_create_sql(tests, collation_support, supports_icu) + for db in sorted(create_sql): + node.safe_psql(create_sql[db], dbname=db) + + +def _negative_tests(node, port, tempdir): + """Run pg_dump/pg_dumpall error-path checks (mirrors the command_fails_like + and command_ok preamble of the Perl driver).""" + node.command_fails_like( + ["pg_dump", "--port", str(port), "qqq"], + r'pg_dump: error: connection to server .* failed: FATAL: database "qqq" does not exist', + "connecting to a non-existent database", + ) + node.command_fails_like( + ["pg_dump", "--dbname", "regression_invalid"], + r'pg_dump: error: connection to server .* failed: FATAL: cannot connect to invalid database "regression_invalid"', + "connecting to an invalid database", + ) + node.command_fails_like( + ["pg_dump", "--port", str(port), "--role", "regress_dump_test_role"], + r"pg_dump: error: query failed: ERROR: permission denied for", + "connecting with an unprivileged user", + ) + node.command_fails_like( + ["pg_dump", "--port", str(port), "--schema", "nonexistent"], + r"pg_dump: error: no matching schemas were found", + "dumping a non-existent schema", + ) + node.command_fails_like( + ["pg_dump", "--port", str(port), "--table", "nonexistent"], + r"pg_dump: error: no matching tables were found", + "dumping a non-existent table", + ) + node.command_fails_like( + ["pg_dump", "--port", str(port), "--strict-names", "--schema", "nonexistent*"], + r"pg_dump: error: no matching schemas were found for pattern", + "no matching schemas", + ) + node.command_fails_like( + [ + "pg_dump", + "--port", + str(port), + "--strict-names", + "--schema-only", + "--statistics", + ], + r"pg_dump: error: options --statistics and -s/--schema-only cannot be used together", + "cannot use --statistics and --schema-only together", + ) + node.command_fails_like( + ["pg_dump", "--port", str(port), "--strict-names", "--table", "nonexistent*"], + r"pg_dump: error: no matching tables were found for pattern", + "no matching tables", + ) + node.command_fails_like( + ["pg_dumpall", "--exclude-database", "."], + r"pg_dumpall: error: improper qualified name \(too many dotted names\): \.", + 'pg_dumpall: option --exclude-database rejects multipart pattern "."', + ) + node.command_fails_like( + ["pg_dumpall", "--exclude-database", "myhost.mydb"], + r"pg_dumpall: error: improper qualified name \(too many dotted names\): myhost\.mydb", + "pg_dumpall: option --exclude-database rejects multipart database names", + ) + node.command_ok( + [ + "pg_dump", + "--port", + str(port), + "--schema", + "pg_catalog", + "--file", + f"{tempdir}/pgdump_pgcatalog.dmp", + ], + "pg_dump: option -n pg_catalog", + ) + node.command_ok( + [ + "pg_dumpall", + "--port", + str(port), + "--exclude-database", + '"myhost.mydb"', + "--file", + f"{tempdir}/pgdumpall.dmp", + ], + "pg_dumpall: option --exclude-database handles database names with embedded dots", + ) + node.command_fails_like( + ["pg_dump", "--schema", "myhost.mydb.myschema"], + r"pg_dump: error: improper qualified name \(too many dotted names\): myhost\.mydb\.myschema", + "pg_dump: option --schema rejects three-part schema names", + ) + node.command_fails_like( + ["pg_dump", "--schema", "otherdb.myschema"], + r"pg_dump: error: cross-database references are not implemented: otherdb\.myschema", + "pg_dump: option --schema rejects cross-database multipart schema names", + ) + node.command_fails_like( + ["pg_dump", "--schema", "."], + r"pg_dump: error: cross-database references are not implemented: \.", + 'pg_dump: option --schema rejects degenerate two-part schema name: "."', + ) + node.command_fails_like( + ["pg_dump", "--schema", '"some.other.db".myschema'], + r'pg_dump: error: cross-database references are not implemented: "some\.other\.db"\.myschema', + "pg_dump: option --schema rejects cross-database multipart schema names with embedded dots", + ) + node.command_fails_like( + ["pg_dump", "--schema", ".."], + r"pg_dump: error: improper qualified name \(too many dotted names\): \.\.", + 'pg_dump: option --schema rejects degenerate three-part schema name: ".."', + ) + node.command_fails_like( + ["pg_dump", "--table", "myhost.mydb.myschema.mytable"], + r"pg_dump: error: improper relation name \(too many dotted names\): myhost\.mydb\.myschema\.mytable", + "pg_dump: option --table rejects four-part table names", + ) + node.command_fails_like( + ["pg_dump", "--table", "otherdb.pg_catalog.pg_class"], + r"pg_dump: error: cross-database references are not implemented: otherdb\.pg_catalog\.pg_class", + "pg_dump: option --table rejects cross-database three part table names", + ) + node.command_fails_like( + [ + "pg_dump", + "--port", + str(port), + "--table", + '"some.other.db".pg_catalog.pg_class', + ], + r'pg_dump: error: cross-database references are not implemented: "some\.other\.db"\.pg_catalog\.pg_class', + "pg_dump: option --table rejects cross-database three part table names with embedded dots", + ) + + +def _run_matrix(create_pg, tempdir: str) -> None: + """Seed a server, then execute every run x test pair (mirrors the driver).""" + supports_icu = os.environ.get("with_icu") == "yes" + supports_gzip = pypg.check_pg_config(r"#define HAVE_LIBZ 1") + + pgdump_runs = _pgdump_runs(tempdir, supports_gzip) + tests = _tests() + + node = create_pg("main") + port = node.port + + # Determine whether this build supports CREATE COLLATION (libc provider). + collation_check = node.psql_capture( + 'CREATE COLLATION testing FROM "C"; DROP COLLATION testing;', + on_error_stop=False, + ) + collation_support = "ERROR: " not in collation_check.stderr + + # ICU doesn't work with some encodings. + encoding = node.safe_psql("show server_encoding") + if encoding == "SQL_ASCII": + supports_icu = False + + _seed_server(node, tempdir, tests, collation_support, supports_icu) + + _negative_tests(node, port, tempdir) + + for run in sorted(pgdump_runs): + spec = pgdump_runs[run] + + node.command_ok(spec["dump_cmd"], f"{run}: pg_dump runs") + + for glob_pattern in spec.get("glob_patterns") or []: + matches = glob.glob(glob_pattern) + ok = len(matches) > 1 or (len(matches) == 1 and os.path.isfile(matches[0])) + assert ok, f"{run}: glob check for {glob_pattern}" + + cmd_like = spec.get("command_like") + if cmd_like: + node.command_like( + cmd_like["command"], + cmd_like["expected"], + f"{run}: {cmd_like['name']}", + ) + + restore_cmd: Optional[list] = spec.get("restore_cmd") + if restore_cmd: + node.command_ok(restore_cmd, f"{run}: pg_restore runs") + + test_key = spec.get("test_key", run) + run_db = spec.get("database", "postgres") + + output_file = pypg.slurp_file(os.path.join(tempdir, f"{run}.sql")) + + _run_tests_for_output( + run, test_key, run_db, output_file, tests, collation_support, supports_icu + ) + + node.stop("fast") diff --git a/src/bin/pg_dump/pyt/test_003_pg_dump_with_server.py b/src/bin/pg_dump/pyt/test_003_pg_dump_with_server.py new file mode 100644 index 0000000000000..b3c6d983f6ca7 --- /dev/null +++ b/src/bin/pg_dump/pyt/test_003_pg_dump_with_server.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_dump/t/003_pg_dump_with_server.pl. + +pg_dump --include-foreign-data against a live server: dumping a foreign table +whose FDW ('dummy') has no handler fails with a clear error naming the table, +while dumping a foreign server that has no tables (data-only) succeeds. +""" + + +def test_003_pg_dump_with_server(create_pg): + """pg_dump --include-foreign-data error and empty-server success paths.""" + node = create_pg("main") + port = node.port + node.safe_psql("CREATE FOREIGN DATA WRAPPER dummy") + node.safe_psql("CREATE SERVER s0 FOREIGN DATA WRAPPER dummy") + node.safe_psql("CREATE SERVER s1 FOREIGN DATA WRAPPER dummy") + node.safe_psql("CREATE SERVER s2 FOREIGN DATA WRAPPER dummy") + node.safe_psql("CREATE FOREIGN TABLE t0 (a int) SERVER s0") + node.safe_psql("CREATE FOREIGN TABLE t1 (a int) SERVER s1") + pg_bin = node.bin + pg_bin.command_fails_like( + ["pg_dump", "--port", str(port), "--include-foreign-data", "s0", "postgres"], + r'foreign-data wrapper "dummy" has no handler\r?\npg_dump: detail: Query was: .*t0', + "correctly fails to dump a foreign table from a dummy FDW", + ) + pg_bin.command_ok( + [ + "pg_dump", + "--port", + str(port), + "--data-only", + "--include-foreign-data", + "s2", + "postgres", + ], + "dump foreign server with no tables", + ) diff --git a/src/bin/pg_dump/pyt/test_004_pg_dump_parallel.py b/src/bin/pg_dump/pyt/test_004_pg_dump_parallel.py new file mode 100644 index 0000000000000..cb1d618f2f26c --- /dev/null +++ b/src/bin/pg_dump/pyt/test_004_pg_dump_parallel.py @@ -0,0 +1,88 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_dump/t/004_pg_dump_parallel.pl. + +A parallel directory-format pg_dump (--jobs) of a database with hash-partitioned +tables (whose unique constraints make restore ordering non-trivial) restores +cleanly with a parallel pg_restore (--jobs), both in COPY form and with +--inserts. +""" + +_SETUP = """ +create type digit as enum ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'); +create table tplain (en digit, data int unique); +insert into tplain select (x%10)::text::digit, x from generate_series(1,1000) x; +create table ths (mod int, data int, unique(mod, data)) partition by hash(mod); +create table ths_p1 partition of ths for values with (modulus 3, remainder 0); +create table ths_p2 partition of ths for values with (modulus 3, remainder 1); +create table ths_p3 partition of ths for values with (modulus 3, remainder 2); +insert into ths select (x%10), x from generate_series(1,1000) x; +create table tht (en digit, data int, unique(en, data)) partition by hash(en); +create table tht_p1 partition of tht for values with (modulus 3, remainder 0); +create table tht_p2 partition of tht for values with (modulus 3, remainder 1); +create table tht_p3 partition of tht for values with (modulus 3, remainder 2); +insert into tht select (x%10)::text::digit, x from generate_series(1,1000) x; +""" + + +def test_004_pg_dump_parallel(create_pg): + """Parallel directory dump/restore of hash-partitioned tables, COPY+inserts.""" + src, dest1, dest2 = "regression_src", "regression_dest1", "regression_dest2" + node = create_pg("main") + backupdir = str(node.backup_dir) + for db in (src, dest1, dest2): + node.bin.run_command(["createdb", db]) + node.safe_psql(_SETUP, dbname=src) + node.command_ok( + [ + "pg_dump", + "--format", + "directory", + "--no-sync", + "--jobs", + "2", + "--file", + backupdir + "/dump1", + node.connstr(src), + ], + "parallel dump", + ) + node.command_ok( + [ + "pg_restore", + "--verbose", + "--dbname", + node.connstr(dest1), + "--jobs", + "3", + backupdir + "/dump1", + ], + "parallel restore", + ) + node.command_ok( + [ + "pg_dump", + "--format", + "directory", + "--no-sync", + "--jobs", + "2", + "--file", + backupdir + "/dump2", + "--inserts", + node.connstr(src), + ], + "parallel dump as inserts", + ) + node.command_ok( + [ + "pg_restore", + "--verbose", + "--dbname", + node.connstr(dest2), + "--jobs", + "3", + backupdir + "/dump2", + ], + "parallel restore as inserts", + ) diff --git a/src/bin/pg_dump/pyt/test_005_pg_dump_filterfile.py b/src/bin/pg_dump/pyt/test_005_pg_dump_filterfile.py new file mode 100644 index 0000000000000..d03b9f5178534 --- /dev/null +++ b/src/bin/pg_dump/pyt/test_005_pg_dump_filterfile.py @@ -0,0 +1,543 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_dump/t/005_pg_dump_filterfile.pl. + +Exercises the --filter file support of pg_dump, pg_dumpall and pg_restore: +include/exclude of tables, schemas, foreign data, functions, indexes, triggers, +table-and-children variants, comment/whitespace tolerance, quoted and multiline +identifiers, --strict-names interaction, and the many invalid-syntax errors. +""" + +import re + +import pypg + + +def _write_filter(path, content): + """Write filter-file content verbatim (newlines preserved).""" + with open(path, "w", encoding="utf-8") as fh: + fh.write(content) + + +def _setup_objects(node): + """Create the tables/functions/schemas/etc. the filter tests dump.""" + node.safe_psql("CREATE FOREIGN DATA WRAPPER dummy;") + node.safe_psql("CREATE SERVER dummyserver FOREIGN DATA WRAPPER dummy;") + for tbl in ("table_one", "table_two", "table_three", "table_three_one", "footab"): + node.safe_psql("CREATE TABLE {}(a varchar)".format(tbl)) + node.safe_psql("CREATE TABLE bootab() inherits (footab)") + node.safe_psql('CREATE TABLE "strange aaa\nname"(a varchar)') + node.safe_psql('CREATE TABLE "\nt\nt\n"(a int)') + node.safe_psql("INSERT INTO table_one VALUES('*** TABLE ONE ***')") + node.safe_psql("INSERT INTO table_two VALUES('*** TABLE TWO ***')") + node.safe_psql("INSERT INTO table_three VALUES('*** TABLE THREE ***')") + node.safe_psql("INSERT INTO table_three_one VALUES('*** TABLE THREE_ONE ***')") + node.safe_psql("INSERT INTO bootab VALUES(10)") + node.safe_psql("CREATE DATABASE sourcedb") + node.safe_psql("CREATE DATABASE targetdb") + _setup_sourcedb(node) + + +def _setup_sourcedb(node): + """Create the sourcedb objects exercised by the pg_restore filter tests.""" + node.safe_psql( + "CREATE FUNCTION foo1(a int) RETURNS int AS $$ select $1 $$ LANGUAGE sql", + dbname="sourcedb", + ) + node.safe_psql( + "CREATE FUNCTION foo2(a int) RETURNS int AS $$ select $1 $$ LANGUAGE sql", + dbname="sourcedb", + ) + node.safe_psql( + "CREATE FUNCTION foo3(a double precision, b int) RETURNS double precision " + "AS $$ select $1 + $2 $$ LANGUAGE sql", + dbname="sourcedb", + ) + node.safe_psql( + "CREATE FUNCTION foo_trg() RETURNS trigger AS $$ BEGIN RETURN NEW; END $$ " + "LANGUAGE plpgsql", + dbname="sourcedb", + ) + node.safe_psql( + "CREATE SCHEMA s1;\nCREATE SCHEMA s2;\n" + "CREATE TABLE s1.t1(a int);\nCREATE SEQUENCE s1.s1;\n" + "CREATE TABLE s2.t2(a int);\n" + "CREATE TABLE t1(a int, b int);\nCREATE TABLE t2(a int, b int);\n" + "CREATE INDEX t1_idx1 ON t1(a);\nCREATE INDEX t1_idx2 ON t1(b);\n" + "CREATE TRIGGER trg1 BEFORE INSERT ON t1 EXECUTE FUNCTION foo_trg();\n" + "CREATE TRIGGER trg2 BEFORE INSERT ON t1 EXECUTE FUNCTION foo_trg();", + dbname="sourcedb", + ) + + +class _Filter: + """Bundles a node, filter-file path, output file path and dump helpers.""" + + def __init__(self, node, tempdir, plainfile): + self.node = node + self.port = node.port + self.inputfile = tempdir + "/inputfile.txt" + self.inputfile2 = tempdir + "/inputfile2.txt" + self.plainfile = plainfile + + def write(self, content, path=None): + """Write filter content to the (default) filter file.""" + _write_filter(path or self.inputfile, content) + + def dump_ok(self, msg, db="postgres", extra=None): + """Run pg_dump with the current filter file; assert success.""" + cmd = [ + "pg_dump", + "--port", + str(self.port), + "--file", + self.plainfile, + "--filter", + self.inputfile, + ] + cmd += extra or [] + cmd.append(db) + self.node.command_ok(cmd, msg) + + def dump_fails(self, pattern, msg, db="postgres"): + """Run pg_dump with the current filter file; assert failure + stderr.""" + self.node.command_fails_like( + [ + "pg_dump", + "--port", + str(self.port), + "--file", + self.plainfile, + "--filter", + self.inputfile, + db, + ], + pattern, + msg, + ) + + def slurp(self): + """Return the dump output written to the plain file.""" + return pypg.slurp_file(self.plainfile) + + +def _like(dump, pattern, msg, flags=re.MULTILINE): + """Assert dump matches pattern.""" + assert re.search(pattern, dump, flags), msg + + +def _unlike(dump, pattern, msg, flags=re.MULTILINE): + """Assert dump does not match pattern.""" + assert not re.search(pattern, dump, flags), msg + + +def _test_basic_filters(flt): + """Empty filter, mixed comments/whitespace, qualified names, exclusions.""" + flt.write("\n # a comment and nothing more\n\n") + flt.dump_ok("filter file without patterns") + dump = flt.slurp() + for tbl in ("table_one", "table_two", "table_three", "table_three_one"): + _like(dump, r"^CREATE TABLE public\." + tbl, "{} dumped".format(tbl)) + + flt.write( + " include table table_one #comment\n" + "include table table_two\n" + "# skip this line\n" + "\n" + "\t\n" + " \t# another comment\n" + "exclude table_data table_one\n" + ) + flt.dump_ok("dump tables with filter patterns as well as comments") + dump = flt.slurp() + _like(dump, r"^CREATE TABLE public\.table_one", "dumped table one") + _like(dump, r"^CREATE TABLE public\.table_two", "dumped table two") + _unlike(dump, r"^CREATE TABLE public\.table_three", "table three not dumped") + _unlike( + dump, r"^CREATE TABLE public\.table_three_one", "table three_one not dumped" + ) + _unlike(dump, r"^COPY public\.table_one", "content of table one is not included") + _like(dump, r"^COPY public\.table_two", "content of table two is included") + + flt.write( + "include table public.table_one\n" + 'include table "public"."table_two"\n' + 'include table "public". table_three\n' + ) + flt.dump_ok("filter file with qualified names") + dump = flt.slurp() + _like(dump, r"^CREATE TABLE public\.table_one", "dumped table one") + _like(dump, r"^CREATE TABLE public\.table_two", "dumped table two") + _like(dump, r"^CREATE TABLE public\.table_three", "dumped table three") + + flt.write("exclude table table_one\n") + flt.dump_ok("dump tables with exclusion of a single table") + dump = flt.slurp() + _unlike(dump, r"^CREATE TABLE public\.table_one", "table one not dumped") + _like(dump, r"^CREATE TABLE public\.table_two", "dumped table two") + _like(dump, r"^CREATE TABLE public\.table_three", "dumped table three") + _like(dump, r"^CREATE TABLE public\.table_three_one", "dumped table three_one") + + flt.write("include table table_thre*\n") + flt.dump_ok("dump tables with wildcard in pattern") + dump = flt.slurp() + _unlike(dump, r"^CREATE TABLE public\.table_one", "table one not dumped") + _unlike(dump, r"^CREATE TABLE public\.table_two", "table two not dumped") + _like(dump, r"^CREATE TABLE public\.table_three", "dumped table three") + _like(dump, r"^CREATE TABLE public\.table_three_one", "dumped table three_one") + + +def _test_multiline_names(flt): + """Multiline quoted table names, schema exclusion, multiple filters.""" + flt.write('include table "strange aaa\nname"') + flt.dump_ok("dump tables with multiline names requiring quoting") + _like( + flt.slurp(), + r"^CREATE TABLE public.\"strange aaa", + "dump table with new line in name", + ) + + flt.write('exclude table "strange aaa\\nname"') + flt.dump_ok("dump tables with filter") + _unlike( + flt.slurp(), + r"^CREATE TABLE public.\"strange aaa", + "exclude table with new line in name", + ) + + flt.write("exclude schema public\n") + flt.dump_ok("exclude the public schema") + _unlike(flt.slurp(), r"^CREATE TABLE", "no table dumped") + + flt.write("include schema public\n") + flt.write("exclude schema public\n", path=flt.inputfile2) + flt.node.command_ok( + [ + "pg_dump", + "--port", + str(flt.port), + "--file", + flt.plainfile, + "--filter", + flt.inputfile, + "--filter", + flt.inputfile2, + "postgres", + ], + "exclude the public schema with multiple filters", + ) + _unlike(flt.slurp(), r"^CREATE TABLE", "no table dumped") + + flt.write('include table "\nt\nt\n"') + flt.dump_ok("dump tables with multiline leading-newline name") + _like( + flt.slurp(), + r"^CREATE TABLE public.\"\nt\nt\n\" \($", + "dump table with multiline strange name", + flags=re.MULTILINE | re.DOTALL, + ) + + flt.write('include table "\\nt\\nt\\n"') + flt.dump_ok("dump tables with escaped multiline name") + _like( + flt.slurp(), + r"^CREATE TABLE public.\"\nt\nt\n\" \($", + "dump table with multiline strange name", + flags=re.MULTILINE | re.DOTALL, + ) + + +def _test_foreign_data_and_syntax(flt): + """Foreign-data filters and the invalid-syntax dump errors.""" + flt.write("include foreign_data doesnt_exists\n") + flt.dump_fails( + r"pg_dump: error: no matching foreign servers were found for pattern", + "dump nonexisting foreign server", + ) + + flt.write("include foreign_data dummyserver\n") + flt.dump_ok("dump foreign_data with filter") + _like(flt.slurp(), r"^CREATE SERVER dummyserver", "dump foreign server") + + flt.write("exclude foreign_data dummy*\n") + flt.dump_fails( + r'exclude filter for "foreign data" is not allowed', + "erroneously exclude foreign server", + ) + + flt.write("k") + flt.dump_fails(r"invalid filter command", "invalid syntax: incorrect command") + + flt.write("exclude table-data one") + flt.dump_fails( + r'unsupported filter object type: "table-data"', + "invalid syntax: invalid object type specified", + ) + + flt.write("include table") + flt.dump_fails(r"missing object name", "invalid syntax: missing pattern") + + flt.write("include table table one") + flt.dump_fails( + r"no matching tables were found", + "invalid syntax: extra content after pattern", + ) + + +def _test_strict_names(flt): + """--strict-names with matching and non-matching patterns.""" + flt.write("include table table_one\n") + flt.dump_ok("strict names with matching pattern", extra=["--strict-names"]) + _like(flt.slurp(), r"^CREATE TABLE public\.table_one", "table one dumped") + + with open(flt.inputfile, "a", encoding="utf-8") as fh: + fh.write("include table table_nonexisting_name") + flt.node.command_fails_like( + [ + "pg_dump", + "--port", + str(flt.port), + "--file", + flt.plainfile, + "--filter", + flt.inputfile, + "--strict-names", + "postgres", + ], + r"no matching tables were found", + "inclusion of non-existing objects with --strict names", + ) + + +def _test_pg_dumpall(flt): + """pg_dumpall --filter database exclusion and invalid-syntax errors.""" + node = flt.node + + def _dumpall_ok(msg, extra=None): + node.command_ok( + [ + "pg_dumpall", + "--port", + str(flt.port), + "--file", + flt.plainfile, + "--filter", + flt.inputfile, + ] + + (extra or []), + msg, + ) + + def _dumpall_fails(pattern, msg, extra=None): + node.command_fails_like( + [ + "pg_dumpall", + "--port", + str(flt.port), + "--file", + flt.plainfile, + "--filter", + flt.inputfile, + ] + + (extra or []), + pattern, + msg, + ) + + flt.write("exclude database postgres\n") + _dumpall_ok("dump tables with exclusion of a database") + dump = flt.slurp() + _unlike(dump, r"^\\connect postgres", "database postgres is not dumped") + _like(dump, r"^\\connect template1", "database template1 is dumped") + _dumpall_fails( + r"pg_dumpall: error: options --exclude-database and -g/--globals-only " + r"cannot be used together", + "pg_dumpall: --exclude-database and --globals-only cannot be used together", + extra=["--globals-only"], + ) + + flt.write("k") + _dumpall_fails(r"invalid filter command", "invalid syntax: incorrect command") + flt.write("exclude xxx") + _dumpall_fails( + r'unsupported filter object type: "xxx"', + "invalid syntax: exclusion of non-existing object type", + ) + flt.write("exclude table foo") + _dumpall_fails( + r"pg_dumpall: error: invalid format in filter", + "invalid syntax: exclusion of unsupported object type", + ) + + +def _restore_ok(flt, dumpfile, msg, fmt="custom"): + """Run pg_restore with the current filter file; assert success.""" + cmd = [ + "pg_restore", + "--port", + str(flt.port), + "--file", + flt.plainfile, + "--filter", + flt.inputfile, + ] + if fmt: + cmd += ["--format", fmt, dumpfile] + flt.node.command_ok(cmd, msg) + + +def _restore_fails(flt, pattern, msg): + """Run pg_restore with the current filter file; assert failure + stderr.""" + flt.node.command_fails_like( + [ + "pg_restore", + "--port", + str(flt.port), + "--file", + flt.plainfile, + "--filter", + flt.inputfile, + ], + pattern, + msg, + ) + + +def _test_pg_restore_tables(flt, tempdir): + """pg_restore --filter table inclusion plus the disallowed-object errors.""" + node = flt.node + dumpfile = tempdir + "/filter_test.dump" + node.command_ok( + [ + "pg_dump", + "--port", + str(flt.port), + "--file", + dumpfile, + "--format", + "custom", + "postgres", + ], + "dump all tables", + ) + flt.write("include table table_two") + _restore_ok(flt, dumpfile, "restore tables with filter") + dump = flt.slurp() + _like(dump, r"^CREATE TABLE public\.table_two", "wanted table restored") + _unlike(dump, r"^CREATE TABLE public\.table_one", "unwanted table not restored") + + for content, obj in ( + ("include table_data xxx", "table data"), + ("include extension xxx", "extension"), + ): + flt.write(content) + _restore_fails( + flt, + r'include filter for "{}" is not allowed'.format(obj), + "invalid syntax: inclusion of unallowed object", + ) + for content, obj in ( + ("exclude extension xxx", "extension"), + ("exclude table_data xxx", "table data"), + ): + flt.write(content) + _restore_fails( + flt, + r'exclude filter for "{}" is not allowed'.format(obj), + "invalid syntax: exclusion of unallowed object", + ) + + +def _test_pg_restore_objects(flt, tempdir): + """pg_restore --filter for functions, indexes, triggers and schemas.""" + node = flt.node + dumpfile = tempdir + "/filter_test.dump" + node.command_ok( + [ + "pg_dump", + "--port", + str(flt.port), + "--file", + dumpfile, + "--format", + "custom", + "sourcedb", + ], + "dump all objects from sourcedb", + ) + + flt.write("include function foo1(integer)") + _restore_ok(flt, dumpfile, "restore function with filter") + dump = flt.slurp() + _like(dump, r"^CREATE FUNCTION public\.foo1", "wanted function restored") + _unlike(dump, r"^CREATE TABLE public\.foo2", "unwanted function not restored") + + flt.write("include function foo3 ( double precision , integer) ") + _restore_ok(flt, dumpfile, "restore function with whitespace-tolerant filter") + _like(flt.slurp(), r"^CREATE FUNCTION public\.foo3", "wanted function restored") + + flt.write("include index t1_idx1\ninclude trigger t1 trg1\n") + _restore_ok(flt, dumpfile, "restore index/trigger with filter") + dump = flt.slurp() + _like(dump, r"^CREATE INDEX t1_idx1", "wanted index restored") + _unlike(dump, r"^CREATE INDEX t2_idx2", "unwanted index not restored") + _like(dump, r"^CREATE TRIGGER trg1", "wanted trigger restored") + _unlike(dump, r"^CREATE TRIGGER trg2", "unwanted trigger not restored") + + flt.write("include schema s1\n") + _restore_ok(flt, dumpfile, "restore schema with filter") + dump = flt.slurp() + _like(dump, r"^CREATE TABLE s1\.t1", "wanted table from schema restored") + _like(dump, r"^CREATE SEQUENCE s1\.s1", "wanted sequence from schema restored") + _unlike(dump, r"^CREATE TABLE s2\t2", "unwanted table not restored") + + flt.write("exclude schema s1\n") + _restore_ok(flt, dumpfile, "restore with schema exclusion filter") + dump = flt.slurp() + _unlike(dump, r"^CREATE TABLE s1\.t1", "unwanted table from schema not restored") + _unlike(dump, r"^CREATE SEQUENCE s1\.s1", "unwanted sequence not restored") + _like(dump, r"^CREATE TABLE s2\.t2", "wanted table restored") + _like(dump, r"^CREATE TABLE public\.t1", "wanted table restored") + + +def _test_table_and_children(flt): + """table_and_children / table_data_and_children filters and extensions.""" + flt.write("include table_and_children footab\n") + flt.dump_ok("filter table_and_children include") + _like(flt.slurp(), r"^CREATE TABLE public\.bootab", "dumped children table") + + flt.write("exclude table_and_children footab\n") + flt.dump_ok("filter table_and_children exclude") + _unlike(flt.slurp(), r"^CREATE TABLE public\.bootab", "exclude children table") + + flt.write("exclude table_data_and_children footab\n") + flt.dump_ok("filter table_data_and_children exclude") + dump = flt.slurp() + _like(dump, r"^CREATE TABLE public\.bootab", "dumped children table") + _unlike(dump, r"^COPY public\.bootab", "exclude children table data") + + flt.write("include extension doesnt_exists\n") + flt.dump_fails( + r"pg_dump: error: no matching extensions were found", + "dump nonexisting extension", + ) + + +def test_005_pg_dump_filterfile(create_pg, tmp_path): + """pg_dump/pg_dumpall/pg_restore --filter file behavior and errors.""" + tempdir = str(tmp_path) + node = create_pg("main") + backupdir = str(node.backup_dir) + plainfile = backupdir + "/plain.sql" + + _setup_objects(node) + + flt = _Filter(node, tempdir, plainfile) + _test_basic_filters(flt) + _test_multiline_names(flt) + _test_foreign_data_and_syntax(flt) + _test_strict_names(flt) + _test_pg_dumpall(flt) + _test_pg_restore_tables(flt, tempdir) + _test_pg_restore_objects(flt, tempdir) + _test_table_and_children(flt) diff --git a/src/bin/pg_dump/pyt/test_006_pg_dump_compress.py b/src/bin/pg_dump/pyt/test_006_pg_dump_compress.py new file mode 100644 index 0000000000000..0268b88fc3a03 --- /dev/null +++ b/src/bin/pg_dump/pyt/test_006_pg_dump_compress.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_dump/t/006_pg_dump_compress.pl. + +Compression-focused pg_dump/pg_restore matrix: every supported compression +method (none/gzip/lz4/zstd) in custom, directory and plain formats, with +coverage for manually (de)compressed TOC/data files and verification that the +dumped SQL matches the expected per-object regexes. Runs that need a build +option absent from this build are skipped exactly as the Perl original does. +""" + +import glob as globmod +import os +import re +import subprocess +from typing import Any, Dict + +import pypg + + +def _supports(option): + """Return True if the build was compiled with the given option.""" + defines = { + "gzip": r"#define HAVE_LIBZ 1", + "lz4": r"#define USE_LZ4 1", + "zstd": r"#define USE_ZSTD 1", + } + return pypg.check_pg_config(defines[option]) + + +# Object-creation SQL and the per-object dump regexes (the %tests matrix). +_TESTS: Dict[str, Dict[str, Any]] = { + "matview_compression_lz4": { + "create_order": 20, + "create_sql": ( + "CREATE MATERIALIZED VIEW\n" + " matview_compression_lz4 (col2) AS\n" + " SELECT repeat('xyzzy', 10000);\n" + "ALTER MATERIALIZED VIEW matview_compression_lz4\n" + "ALTER COLUMN col2 SET COMPRESSION lz4;" + ), + "regexp": re.compile( + r"^" + + re.escape("CREATE MATERIALIZED VIEW public.matview_compression_lz4 AS") + + r"\n\s+" + + re.escape("SELECT repeat('xyzzy'::text, 10000) AS col2") + + r"\n\s+" + + re.escape("WITH NO DATA;") + + r".*" + + re.escape( + "ALTER TABLE ONLY public.matview_compression_lz4 ALTER COLUMN " + "col2 SET COMPRESSION lz4;" + ) + + r"\n", + re.DOTALL | re.MULTILINE, + ), + "compile_option": "lz4", + "like": True, + }, + "test_compression_method_create": { + "create_order": 110, + "create_sql": "CREATE TABLE test_compression_method (\n col1 text\n);", + "regexp": re.compile( + r"^" + + re.escape("CREATE TABLE public.test_compression_method (") + + r"\n\s+" + + re.escape("col1 text") + + r"\n" + + re.escape(");"), + re.MULTILINE, + ), + "like": True, + }, + "test_compression_method_copy": { + "create_order": 111, + "create_sql": ( + "INSERT INTO test_compression_method (col1) " + "SELECT string_agg(a::text, '') FROM generate_series(1,65536) a;" + ), + "regexp": re.compile( + r"^" + + re.escape("COPY public.test_compression_method (col1) FROM stdin;") + + r"\n(?:(?:\d\d\d\d\d\d\d\d\d\d){31657}\d\d\d\d\n){1}\\\.\n", + re.MULTILINE, + ), + "like": True, + }, + "test_compression_create": { + "create_order": 3, + "create_sql": ( + "CREATE TABLE test_compression (\n" + " col1 int,\n" + " col2 text COMPRESSION lz4\n" + ");" + ), + "regexp": re.compile( + r"^" + + re.escape("CREATE TABLE public.test_compression (") + + r"\n\s+" + + re.escape("col1 integer,") + + r"\n\s+" + + re.escape("col2 text") + + r"\n" + + re.escape(");") + + r"\n.*" + + re.escape( + "ALTER TABLE ONLY public.test_compression ALTER COLUMN " + "col2 SET COMPRESSION lz4;" + ) + + r"\n", + re.DOTALL | re.MULTILINE, + ), + "compile_option": "lz4", + "like": True, + }, + "lo_create": { + "create_order": 50, + "create_sql": ( + "SELECT pg_catalog.lo_from_bytea(0, " + "'\\x310a320a330a340a350a360a370a380a390a');" + ), + "regexp": re.compile(r"^SELECT pg_catalog\.lo_create\('\d+'\);", re.MULTILINE), + "like": True, + }, + "lo_load": { + "regexp": re.compile( + r"^" + + re.escape("SELECT pg_catalog.lo_open") + + r"\('\d+', \d+\);\n" + + re.escape("SELECT pg_catalog.lowrite(0, ") + + re.escape("'\\x310a320a330a340a350a360a370a380a390a');") + + r"\n" + + re.escape("SELECT pg_catalog.lo_close(0);"), + re.MULTILINE, + ), + "like": True, + }, +} + + +def _setup_objects(node): + """Create dumped objects in create_order, skipping unsupported options.""" + creatable = [ + (name, spec) for name, spec in _TESTS.items() if spec.get("create_sql") + ] + creatable.sort(key=lambda item: item[1].get("create_order", 1 << 30)) + create_sql = "" + for _name, spec in creatable: + option = spec.get("compile_option") + if option and not _supports(option): + continue + sql = str(spec["create_sql"]).rstrip("\n") + if not sql.endswith(";"): + sql += ";" + create_sql += sql + "\n\n" + node.safe_psql(create_sql) + + +def _dump_runs(tempdir): + """Return the %pgdump_runs matrix (dump/restore/compress/glob/command).""" + runs: Dict[str, Dict[str, Any]] = {} + runs["compression_none_custom"] = { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "custom", + "--compress", + "none", + "--file", + tempdir + "/compression_none_custom.dump", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--file", + tempdir + "/compression_none_custom.sql", + "--statistics", + tempdir + "/compression_none_custom.dump", + ], + } + for method, ext, prog_env in ( + ("gzip", "gz", "GZIP_PROGRAM"), + ("lz4", "lz4", "LZ4"), + ("zstd", "zst", "ZSTD"), + ): + runs.update(_method_runs(tempdir, method, ext, prog_env)) + return runs + + +def _method_runs(tempdir, method, ext, prog_env): + """The custom/dir/plain runs for one compression method.""" + base = tempdir + "/compression_" + method + custom_compress = "1" if method == "gzip" else method + dir_compress = {"gzip": "gzip:1", "lz4": "lz4:1", "zstd": "zstd:1"}[method] + plain_compress = {"gzip": "1", "lz4": "lz4", "zstd": "zstd:long"}[method] + return { + "compression_{}_custom".format(method): { + "compile_option": method, + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "custom", + "--compress", + custom_compress, + "--file", + base + "_custom.dump", + "--statistics", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--file", + base + "_custom.sql", + "--statistics", + base + "_custom.dump", + ], + "command_like": { + "command": ["pg_restore", "--list", base + "_custom.dump"], + "expected": r"Compression: {}".format(method), + "name": "data content is {} compressed".format(method), + }, + }, + "compression_{}_dir".format(method): { + "compile_option": method, + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--jobs", + "2", + "--format", + "directory", + "--compress", + dir_compress, + "--file", + base + "_dir", + "--statistics", + "postgres", + ], + "compress_cmd": { + "program_env": prog_env, + "args": _dir_compress_args(method, base + "_dir"), + }, + "glob_patterns": [ + base + "_dir/toc.dat." + ext, + base + "_dir/*.dat." + ext, + ], + "restore_cmd": [ + "pg_restore", + "--jobs", + "2", + "--file", + base + "_dir.sql", + "--statistics", + base + "_dir", + ], + }, + "compression_{}_plain".format(method): { + "compile_option": method, + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "plain", + "--compress", + plain_compress, + "--file", + base + "_plain.sql." + ext, + "--statistics", + "postgres", + ], + "compress_cmd": { + "program_env": prog_env, + "args": _plain_decompress_args(method, base + "_plain.sql." + ext), + }, + }, + } + + +def _dir_compress_args(method, dirpath): + """Arguments to manually compress a directory dump's TOC files.""" + toc = dirpath + "/toc.dat" + blobs = dirpath + "/blobs_*.toc" + if method == "gzip": + return ["-f", toc, blobs] + if method == "lz4": + return ["-z", "-f", "-m", "--rm", toc, blobs] + return ["-z", "-f", "--rm", toc, blobs] + + +def _plain_decompress_args(method, path): + """Arguments to decompress a plain dump back to a .sql we can scan.""" + out = path.rsplit(".", 1)[0] + if method == "gzip": + return ["-d", path] + if method == "lz4": + return ["-d", "-f", path, out] + return ["-d", "-f", path, "-o", out] + + +def _run_compress_cmd(node, run, spec): + """Run the manual (de)compression command; return False to skip the run.""" + compress = spec.get("compress_cmd") + if not compress: + return True + program = os.environ.get(compress["program_env"], "") + if not program: + return False + full = [program] + for arg in compress["args"]: + matches = globmod.glob(arg) + full += matches if matches else [arg] + result = subprocess.run(full, capture_output=True, check=False) + assert result.returncode == 0, "{}: compression commands\n{}".format( + run, result.stderr.decode("utf-8", "replace") + ) + return True + + +def _check_glob_patterns(run, spec): + """Assert each glob pattern matched at least one real file.""" + for pattern in spec.get("glob_patterns", []): + matches = globmod.glob(pattern) + ok = len(matches) > 1 or (len(matches) == 1 and os.path.isfile(matches[0])) + assert ok, "{}: glob check for {}".format(run, pattern) + + +def _check_dump_output(node, run, spec, tempdir): + """Match each enabled %tests regexp as a like/unlike against the dump.""" + output = pypg.slurp_file("{}/{}.sql".format(tempdir, run)) + for test, tspec in sorted(_TESTS.items()): + option = tspec.get("compile_option") + if option and not _supports(option): + continue + if tspec.get("like"): + assert tspec["regexp"].search(output), "{}: should dump {}".format( + run, test + ) + else: + assert not tspec["regexp"].search(output), "{}: should not dump {}".format( + run, test + ) + + +def test_006_pg_dump_compress(create_pg, tmp_path): + """pg_dump/pg_restore compression matrix matches expected dump output.""" + tempdir = str(tmp_path) + node = create_pg("main") + _setup_objects(node) + + runs = _dump_runs(tempdir) + for run in sorted(runs): + spec = runs[run] + option = spec.get("compile_option") + if option and not _supports(option): + continue + node.command_ok(spec["dump_cmd"], "{}: pg_dump runs".format(run)) + if not _run_compress_cmd(node, run, spec): + continue + _check_glob_patterns(run, spec) + cmd_like = spec.get("command_like") + if cmd_like: + node.command_like( + cmd_like["command"], + cmd_like["expected"], + "{}: {}".format(run, cmd_like["name"]), + ) + if spec.get("restore_cmd"): + node.command_ok(spec["restore_cmd"], "{}: pg_restore runs".format(run)) + _check_dump_output(node, run, spec, tempdir) + + node.stop("fast") diff --git a/src/bin/pg_dump/pyt/test_007_pg_dumpall.py b/src/bin/pg_dump/pyt/test_007_pg_dumpall.py new file mode 100644 index 0000000000000..b9ee67a511b95 --- /dev/null +++ b/src/bin/pg_dump/pyt/test_007_pg_dumpall.py @@ -0,0 +1,611 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_dump/t/007_pg_dumpall.pl. + +pg_dumpall in directory/tar/custom formats round-trips roles, tablespaces, +grants, multiple databases (with --exclude-database), globals handling, and the +map.dat database listing; pg_restore -C reproduces the expected SQL. Also covers +the many pg_restore option-combination errors that apply only to pg_dumpall +archives, and that --clean implies --if-exists. +""" + +import os +import re +from typing import Dict, List, Optional, Tuple + +import pypg + +_RUN_DB = "postgres" + + +def _q(*literals): + """Join re.escape'd literal segments (the \\Q...\\E parts of a /x regex).""" + return "".join(re.escape(lit) for lit in literals) + + +def _setup_sql(): + """Per-run setup SQL keyed by run name (executed before any test).""" + return { + "restore_roles": ( + "CREATE ROLE dumpall WITH ENCRYPTED PASSWORD 'admin' SUPERUSER;\n" + "CREATE ROLE dumpall2 WITH REPLICATION CONNECTION LIMIT 10;" + ), + "restore_grants": ( + "CREATE DATABASE tapgrantsdb;\n" + "CREATE SCHEMA private;\n" + "CREATE SEQUENCE serial START 101;\n" + "CREATE FUNCTION fn() RETURNS void AS $$\n" + "BEGIN\n" + "END;\n" + "$$ LANGUAGE plpgsql;\n" + "CREATE ROLE super;\n" + "CREATE ROLE grant1;\n" + "CREATE ROLE grant2;\n" + "CREATE ROLE grant3;\n" + "CREATE ROLE grant4;\n" + "CREATE ROLE grant5;\n" + "CREATE ROLE grant6;\n" + "CREATE ROLE grant7;\n" + "CREATE ROLE grant8;\n" + "CREATE TABLE t (id int);\n" + "INSERT INTO t VALUES (1), (2), (3), (4);\n" + "GRANT SELECT ON TABLE t TO grant1;\n" + "GRANT INSERT ON TABLE t TO grant2;\n" + "GRANT ALL PRIVILEGES ON TABLE t to grant3;\n" + "GRANT CONNECT, CREATE ON DATABASE tapgrantsdb TO grant4;\n" + "GRANT USAGE, CREATE ON SCHEMA private TO grant5;\n" + "GRANT USAGE, SELECT, UPDATE ON SEQUENCE serial TO grant6;\n" + "GRANT super TO grant7;\n" + "GRANT EXECUTE ON FUNCTION fn() TO grant8;" + ), + "excluding_databases": _EXCLUDING_DBS_SQL, + "format_directory": ( + "CREATE TABLE format_directory(a int, b boolean, c text);\n" + "INSERT INTO format_directory VALUES (1, true, 'name1'), " + "(2, false, 'name2');" + ), + "format_tar": ( + "CREATE TABLE format_tar(a int, b boolean, c text);\n" + "INSERT INTO format_tar VALUES (1, false, 'name3'), (2, true, 'name4');" + ), + "format_custom": ( + "CREATE TABLE format_custom(a int, b boolean, c text);\n" + "INSERT INTO format_custom VALUES (1, false, 'name5'), " + "(2, true, 'name6');" + ), + "dump_globals_only": ( + "CREATE TABLE format_dir(a int, b boolean, c text);\n" + "INSERT INTO format_dir VALUES (1, false, 'name5'), (2, true, 'name6');" + ), + "restore_no_globals": ( + "CREATE TABLE no_globals_test(a int, b text);\n" + "INSERT INTO no_globals_test VALUES (1, 'hello'), (2, 'world');" + ), + } + + +_EXCLUDING_DBS_SQL = """\ +CREATE DATABASE db1; +\\c db1 +CREATE TABLE t1 (id int); +INSERT INTO t1 VALUES (1), (2), (3), (4); +CREATE TABLE t2 (id int); +INSERT INTO t2 VALUES (1), (2), (3), (4); + +CREATE DATABASE db2; +\\c db2 +CREATE TABLE t3 (id int); +INSERT INTO t3 VALUES (1), (2), (3), (4); +CREATE TABLE t4 (id int); +INSERT INTO t4 VALUES (1), (2), (3), (4); + +CREATE DATABASE dbex3; +\\c dbex3 +CREATE TABLE t5 (id int); +INSERT INTO t5 VALUES (1), (2), (3), (4); +CREATE TABLE t6 (id int); +INSERT INTO t6 VALUES (1), (2), (3), (4); + +CREATE DATABASE dbex4; +\\c dbex4 +CREATE TABLE t7 (id int); +INSERT INTO t7 VALUES (1), (2), (3), (4); +CREATE TABLE t8 (id int); +INSERT INTO t8 VALUES (1), (2), (3), (4); + +CREATE DATABASE db5; +\\c db5 +CREATE TABLE t9 (id int); +INSERT INTO t9 VALUES (1), (2), (3), (4); +CREATE TABLE t10 (id int); +INSERT INTO t10 VALUES (1), (2), (3), (4); +""" + +_ANY = r"(.*\n)*" + + +def _build_runs(tempdir, tablespace1, tablespace2): + """Return the ordered (name, dump_cmd, restore_cmd, like, unlike) runs.""" + runs: Dict[ + str, + Tuple[ + List[str], + List[str], + Optional["re.Pattern[str]"], + Optional["re.Pattern[str]"], + ], + ] = {} + runs["restore_roles"] = ( + ["pg_dumpall", "--format", "directory", "--file", tempdir + "/restore_roles"], + [ + "pg_restore", + "-C", + "--format", + "directory", + "--file", + tempdir + "/restore_roles.sql", + tempdir + "/restore_roles", + ], + re.compile( + r"\s*" + + _q("CREATE ROLE dumpall2;") + + r"\s*" + + _q( + "ALTER ROLE dumpall2 WITH NOSUPERUSER INHERIT NOCREATEROLE " + "NOCREATEDB NOLOGIN REPLICATION NOBYPASSRLS CONNECTION LIMIT 10;" + ), + re.MULTILINE, + ), + None, + ) + runs["restore_tablespace"] = ( + [ + "pg_dumpall", + "--format", + "directory", + "--file", + tempdir + "/restore_tablespace", + ], + [ + "pg_restore", + "-C", + "--format", + "directory", + "--file", + tempdir + "/restore_tablespace.sql", + tempdir + "/restore_tablespace", + ], + re.compile( + r"^\n" + + _q("CREATE TABLESPACE tbl2 OWNER tap LOCATION ") + + r"(?:E)?" + + _q("'{}';".format(tablespace2)) + + r"\n" + + _q("ALTER TABLESPACE tbl2 SET (seq_page_cost=1.0);"), + re.MULTILINE, + ), + None, + ) + runs["restore_grants"] = ( + ["pg_dumpall", "--format", "directory", "--file", tempdir + "/restore_grants"], + [ + "pg_restore", + "-C", + "--format", + "directory", + "--file", + tempdir + "/restore_grants.sql", + tempdir + "/restore_grants", + ], + re.compile(_GRANTS_LIKE, re.MULTILINE), + None, + ) + runs["excluding_databases"] = _excluding_databases_run(tempdir) + for fmt, name in ( + ("directory", "format_directory"), + ("tar", "format_tar"), + ("custom", "format_custom"), + ): + runs[name] = _format_run(tempdir, fmt, name) + runs["dump_globals_only"] = ( + [ + "pg_dumpall", + "--format", + "directory", + "--globals-only", + "--file", + tempdir + "/dump_globals_only", + ], + [ + "pg_restore", + "-C", + "--globals-only", + "--format", + "directory", + "--file", + tempdir + "/dump_globals_only.sql", + tempdir + "/dump_globals_only", + ], + re.compile(r"^\s*" + _q("CREATE ROLE dumpall;") + r"\s*\n", re.MULTILINE), + None, + ) + runs["restore_no_globals"] = ( + [ + "pg_dumpall", + "--format", + "directory", + "--file", + tempdir + "/restore_no_globals", + ], + [ + "pg_restore", + "-C", + "--no-globals", + "--format", + "directory", + "--file", + tempdir + "/restore_no_globals.sql", + tempdir + "/restore_no_globals", + ], + re.compile( + r"^\n" + _q("COPY public.no_globals_test (a, b) FROM stdin;"), + re.MULTILINE, + ), + re.compile(r"^" + _q("CREATE ROLE dumpall;"), re.MULTILINE), + ) + return runs + + +_GRANTS_LIKE = ( + r"^\n" + + _q("GRANT ALL ON SCHEMA private TO grant5;") + + _ANY + + r"\n" + + _q("GRANT ALL ON FUNCTION public.fn() TO grant8;") + + _ANY + + r"\n" + + _q("GRANT ALL ON SEQUENCE public.serial TO grant6;") + + _ANY + + r"\n" + + _q("GRANT SELECT ON TABLE public.t TO grant1;") + + r"\n" + + _q("GRANT INSERT ON TABLE public.t TO grant2;") + + r"\n" + + _q("GRANT ALL ON TABLE public.t TO grant3;") + + _ANY + + r"\n" + + _q("GRANT CREATE,CONNECT ON DATABASE tapgrantsdb TO grant4;") +) + + +def _excluding_databases_run(tempdir): + """The excluding_databases run tuple (dump/restore cmds + like/unlike).""" + like = re.compile( + r"^\n" + + _q("CREATE DATABASE db1") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t1 (") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t2 (") + + _ANY + + r"\n" + + _q("CREATE DATABASE db2") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t3 (") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t4 ("), + re.MULTILINE, + ) + unlike = re.compile( + r"^\n" + + _q("CREATE DATABASE db3") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t5 (") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t6 (") + + _ANY + + r"\n" + + _q("CREATE DATABASE db4") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t7 (") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t8 (") + + r"\n" + + _q("CREATE DATABASE db5") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t9 (") + + _ANY + + r"\n" + + _q("CREATE TABLE public.t10 ("), + re.MULTILINE, + ) + return ( + [ + "pg_dumpall", + "--format", + "directory", + "--file", + tempdir + "/excluding_databases", + "--exclude-database", + "dbex*", + ], + [ + "pg_restore", + "-C", + "--format", + "directory", + "--file", + tempdir + "/excluding_databases.sql", + "--exclude-database", + "db5", + tempdir + "/excluding_databases", + ], + like, + unlike, + ) + + +def _format_run(tempdir, fmt, name): + """A format_directory/tar/custom run tuple, asserting the COPY line.""" + table = name + return ( + ["pg_dumpall", "--format", fmt, "--file", tempdir + "/" + name], + [ + "pg_restore", + "-C", + "--format", + fmt, + "--file", + tempdir + "/" + name + ".sql", + tempdir + "/" + name, + ], + re.compile( + r"^\n" + _q("COPY public.{} (a, b, c) FROM stdin;".format(table)), + re.MULTILINE, + ), + None, + ) + + +def _run_dump_restore_cases(node, create_pg, tempdir, runs): + """Execute every dump/restore run and assert its like/unlike patterns.""" + for run in sorted(runs): + setup = _setup_sql().get(run) + if setup: + node.safe_psql(setup, dbname=_RUN_DB) + for run in sorted(runs): + dump_cmd, restore_cmd, like, unlike = runs[run] + target = create_pg("target_" + run) + node.command_ok(dump_cmd, "{}: pg_dumpall runs".format(run)) + node.bin.run_command( + restore_cmd + ["--host", str(target.host), "--port", str(target.port)] + ) + output = pypg.slurp_file("{}/{}.sql".format(tempdir, run)) + assert like or unlike, 'missing "like" or "unlike" in test "{}"'.format(run) + if like: + assert like.search(output), "should dump {}".format(run) + if unlike: + assert not unlike.search(output), "should not dump {}".format(run) + target.stop() + target.clean_node() + + +_ERR_PREFIX = "pg_restore: error: " +_PG_DUMPALL_ERRORS = [ + ( + [], + _ERR_PREFIX + "option -C/--create must be specified when restoring an archive " + "created by pg_dumpall", + "When -C is not used in pg_restore with dump of pg_dumpall", + ), + ( + ["-C", "--list"], + _ERR_PREFIX + "option -l/--list cannot be used when restoring an archive " + "created by pg_dumpall", + "When --list is used in pg_restore with dump of pg_dumpall", + ), + ( + ["-C", "--use-list", "use"], + _ERR_PREFIX + "option -L/--use-list cannot be used when restoring an archive " + "created by pg_dumpall", + "When -L/--use-list is used in pg_restore with dump of pg_dumpall", + ), + ( + ["-C", "--strict-names"], + _ERR_PREFIX + "option --strict-names cannot be used when restoring an archive " + "created by pg_dumpall", + "When --strict-names is used in pg_restore with dump of pg_dumpall", + ), + ( + ["-C", "--clean", "--globals-only"], + _ERR_PREFIX + "options --clean and -g/--globals-only cannot be used together " + "when restoring an archive created by pg_dumpall", + "When --clean and -g/--globals-only are used in pg_restore", + ), + ( + ["-C", "--no-schema"], + _ERR_PREFIX + "option --no-schema cannot be used when restoring an archive " + "created by pg_dumpall", + "When --no-schema is used in pg_restore with dump of pg_dumpall", + ), + ( + ["-C", "--data-only"], + _ERR_PREFIX + "option -a/--data-only cannot be used when restoring an archive " + "created by pg_dumpall", + "When --data-only is used in pg_restore with dump of pg_dumpall", + ), + ( + ["-C", "--statistics-only"], + _ERR_PREFIX + + "option --statistics-only cannot be used when restoring an archive " + "created by pg_dumpall", + "When --statistics-only is used in pg_restore with dump of pg_dumpall", + ), + ( + ["-C", "--section", "post-data"], + _ERR_PREFIX + "option --section cannot exclude --pre-data when restoring a " + "pg_dumpall archive", + "When --section=post-data is used in pg_restore", + ), + ( + ["-C", "--globals-only", "--data-only"], + _ERR_PREFIX + + "options -a/--data-only and -g/--globals-only cannot be used together", + "When --globals-only and --data-only are used together", + ), + ( + ["-C", "--globals-only", "--schema-only"], + _ERR_PREFIX + + "options -g/--globals-only and -s/--schema-only cannot be used together", + "When --globals-only and --schema-only are used together", + ), + ( + ["-C", "--globals-only", "--statistics-only"], + _ERR_PREFIX + + "options -g/--globals-only and --statistics-only cannot be used together", + "When --globals-only and --statistics-only are used together", + ), + ( + ["-C", "--globals-only", "--statistics"], + _ERR_PREFIX + + "options --statistics and -g/--globals-only cannot be used together", + "When --globals-only and --statistics are used together", + ), + ( + ["-C", "--globals-only", "--exit-on-error"], + _ERR_PREFIX + + "options --exit-on-error and -g/--globals-only cannot be used together", + "When --globals-only and --exit-on-error are used together", + ), + ( + ["-C", "--globals-only", "--single-transaction"], + _ERR_PREFIX + "options -g/--globals-only and -1/--single-transaction cannot be " + "used together", + "When --globals-only and --single-transaction are used together", + ), + ( + ["-C", "--globals-only", "--transaction-size", "100"], + _ERR_PREFIX + + "options -g/--globals-only and --transaction-size cannot be used together", + "When --globals-only and --transaction-size are used together", + ), +] + + +def _check_pg_dumpall_errors(node, tempdir): + """pg_restore option errors that apply only to pg_dumpall archives.""" + archive = tempdir + "/format_custom" + for extra_opts, err, msg in _PG_DUMPALL_ERRORS: + cmd = ["pg_restore", archive, "--format", "custom"] + extra_opts + if "-d" not in extra_opts: + cmd += ["--file", tempdir + "/error_test.sql"] + node.command_fails_like(cmd, re.escape(err), msg) + node.command_fails_like( + ["pg_restore", archive, "-C", "--format", "custom", "-d", "dbpq"], + re.escape('FATAL: database "dbpq" does not exist'), + "When non-existent database is given with -d option", + ) + + +def _check_map_dat_and_clean(node, create_pg, tempdir): + """map.dat preamble, commenting out a db, and --clean implies --if-exists.""" + map_dat = pypg.slurp_file(tempdir + "/format_directory/map.dat") + assert re.search( + r"^# map\.dat\n.*# This file maps oids to database names", + map_dat, + re.DOTALL | re.MULTILINE, + ), "map.dat contains expected preamble" + + node.safe_psql( + "CREATE DATABASE comment_test_db;\n" + "\\c comment_test_db\n" + "CREATE TABLE comment_test_table (id int);", + dbname=_RUN_DB, + ) + node.command_ok( + ["pg_dumpall", "--format", "directory", "--file", tempdir + "/comment_test"], + "pg_dumpall for comment test", + ) + map_path = tempdir + "/comment_test/map.dat" + map_content = pypg.slurp_file(map_path) + map_content = re.sub( + r"^(\d+ comment_test_db)$", r"# \1", map_content, flags=re.MULTILINE + ) + with open(map_path, "w", encoding="utf-8") as fh: + fh.write(map_content) + + target_comment = create_pg("target_comment") + node.command_ok( + [ + "pg_restore", + "-C", + "--format", + "directory", + "--file", + tempdir + "/comment_test_restore.sql", + "--host", + str(target_comment.host), + "--port", + str(target_comment.port), + tempdir + "/comment_test", + ], + "pg_restore with commented out database in map.dat", + ) + restore_output = pypg.slurp_file(tempdir + "/comment_test_restore.sql") + assert not re.search( + r"CREATE DATABASE comment_test_db", restore_output + ), "commented out database in map.dat is not restored" + + node.command_ok( + [ + "pg_restore", + "-C", + "--format", + "custom", + "--clean", + "--file", + tempdir + "/clean_test.sql", + tempdir + "/format_custom", + ], + "pg_restore with --clean on pg_dumpall archive", + ) + clean_output = pypg.slurp_file(tempdir + "/clean_test.sql") + assert re.search( + r"DROP ROLE IF EXISTS", clean_output + ), "--clean implies --if-exists: DROP ROLE IF EXISTS in output" + + +def test_007_pg_dumpall(create_pg, tmp_path): + """pg_dumpall format round-trips, exclusions, and pg_restore-only errors.""" + tempdir = str(tmp_path) + tablespace1 = tempdir + "/tbl1" + tablespace2 = tempdir + "/tbl2" + os.mkdir(tablespace1) + os.mkdir(tablespace2) + + node = create_pg("node") + + # restore_tablespace setup needs the (escaped) tablespace locations. + node.safe_psql( + "CREATE ROLE tap;\n" + "CREATE TABLESPACE tbl1 OWNER tap LOCATION '{}';\n" + "CREATE TABLESPACE tbl2 OWNER tap LOCATION '{}' " + "WITH (seq_page_cost=1.0);".format(tablespace1, tablespace2), + dbname=_RUN_DB, + ) + + runs = _build_runs(tempdir, tablespace1, tablespace2) + _run_dump_restore_cases(node, create_pg, tempdir, runs) + _check_pg_dumpall_errors(node, tempdir) + _check_map_dat_and_clean(node, create_pg, tempdir) + + node.stop("fast") diff --git a/src/bin/pg_dump/pyt/test_010_dump_connstr.py b/src/bin/pg_dump/pyt/test_010_dump_connstr.py new file mode 100644 index 0000000000000..dd8dcfd61d984 --- /dev/null +++ b/src/bin/pg_dump/pyt/test_010_dump_connstr.py @@ -0,0 +1,336 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_dump/t/010_dump_connstr.pl. + +pg_dumpall/pg_dump/pg_restore against databases and roles whose names span the +full LATIN1 byte range, exercising connection-string handling for high-bit and +punctuation-laden identifiers, parallel dump/restore, and restoring a full +pg_dumpall script through psql using both environment variables and +command-line connection parameters. +""" + +import os +import subprocess + +# Source/destination bootstrap superusers (plain ASCII). +_SRC_SUPER = "regress_postgres" +_DST_SUPER = "boot" + + +def _ascii_string(from_char, to_char): + """Return the bytes for code points from_char..to_char (cf. Utils).""" + return bytes(range(from_char, to_char + 1)) + + +def _build_names(): + """Construct the four LATIN1 db names and matching role names (bytes).""" + # Skip ',' (pg_regress --create-role), [\n\r] (pg_dumpall), and many ASCII + # letters to fit the tested characters into four names. '"x"' exercises a + # quoted identifier. + dbname1 = ( + b"regression" + + _ascii_string(1, 9) + + _ascii_string(11, 12) + + _ascii_string(14, 33) + + b'"x"' + + _ascii_string(35, 43) + + _ascii_string(45, 54) + ) + dbname2 = ( + b"regression" + + _ascii_string(55, 65) + + _ascii_string(88, 99) + + _ascii_string(120, 149) + ) + dbname3 = b"regression" + _ascii_string(150, 202) + dbname4 = b"regression" + _ascii_string(203, 255) + dbnames = [dbname1, dbname2, dbname3, dbname4] + usernames = [b"regress_" + d[len(b"regression") :] for d in dbnames] + return dbnames, usernames + + +def _connstr_bytes(node, dbname): + """Return a libpq connection string (bytes) for dbname (bytes). + + Mirrors PostgreSQL::Test::Cluster->connstr: backslashes and single quotes in + the database name are escaped and the value is single-quoted. + """ + escaped = dbname.replace(b"\\", b"\\\\").replace(b"'", b"\\'") + prefix = "host={} port={} dbname=".format(node.host, node.port) + return prefix.encode("latin-1") + b"'" + escaped + b"'" + + +def _latin1_env(node, **extra): + """Connection env forcing C/LATIN1 byte handling for high-bit names.""" + env = dict(node.connenv) + env["LC_ALL"] = "C" + env["PGCLIENTENCODING"] = "LATIN1" + env.update(extra) + return env + + +def _config_auth_roles(node, super_user, roles): + """Run pg_regress --config-auth creating the given roles (bytes-safe).""" + pg_regress = os.environ["PG_REGRESS"] + roles_arg = b",".join(roles) + subprocess.run( + [ + pg_regress, + "--config-auth", + str(node.datadir), + "--user", + super_user, + "--create-role", + roles_arg, + ], + env=_latin1_env(node), + check=True, + ) + + +def _create_db_and_super(node, dbname, username): + """createdb dbname and a superuser username, both owned by the src super.""" + env = {"LC_ALL": "C", "PGCLIENTENCODING": "LATIN1"} + node.bin.command_ok( + ["createdb", "--username", _SRC_SUPER, dbname], "createdb", extra_env=env + ) + node.bin.command_ok( + ["createuser", "--username", _SRC_SUPER, "--superuser", username], + "createuser", + extra_env=env, + ) + + +def _dumpall_roles_only(node, dbname, username, no_sync, msg, discard): + """pg_dumpall --roles-only over a connstr/username (discarding output).""" + cmd = ["pg_dumpall", "--roles-only"] + if no_sync: + cmd = ["pg_dumpall", "--no-sync", "--roles-only"] + cmd += [ + "--file", + discard, + "--dbname", + _connstr_bytes(node, dbname), + "--username", + username, + ] + node.bin.command_ok( + cmd, msg, extra_env={"LC_ALL": "C", "PGCLIENTENCODING": "LATIN1"} + ) + + +def _restore_full_dump(create_pg, name, plain, restore_super): + """Init a fresh LATIN1 node, create the restore super, return the node.""" + node = create_pg( + name, + start=False, + extra=["--username", _DST_SUPER, "--locale", "C", "--encoding", "LATIN1"], + auth_extra=["--user", _DST_SUPER, "--create-role", restore_super], + ) + node.start() + node.bin.command_ok( + ["createuser", "--username", _DST_SUPER, "--superuser", restore_super], + "createuser restore super", + extra_env={"LC_ALL": "C", "PGCLIENTENCODING": "LATIN1"}, + ) + return node + + +def _parallel_dump_restore(node, dbname1, username1, dirfmt): + """Parallel directory dump of dbname1 and parallel restore (with --create).""" + env = {"LC_ALL": "C", "PGCLIENTENCODING": "LATIN1"} + node.bin.command_ok( + [ + "psql", + "--username", + _SRC_SUPER, + "--dbname", + _connstr_bytes(node, dbname1), + "--no-psqlrc", + "-c", + "CREATE TABLE t0()", + ], + "make a table for the parallel worker to dump", + extra_env=env, + ) + node.bin.command_ok( + [ + "pg_dump", + "--format", + "directory", + "--no-sync", + "--jobs", + "2", + "--file", + dirfmt, + "--username", + username1, + _connstr_bytes(node, dbname1), + ], + "parallel dump", + extra_env=env, + ) + node.bin.command_ok( + ["dropdb", "--username", _SRC_SUPER, dbname1], "dropdb", extra_env=env + ) + node.bin.command_ok( + ["createdb", "--username", _SRC_SUPER, dbname1], "createdb", extra_env=env + ) + node.bin.command_ok( + [ + "pg_restore", + "--verbose", + "--dbname", + "template1", + "--jobs", + "2", + "--username", + username1, + dirfmt, + ], + "parallel restore", + extra_env=env, + ) + node.bin.command_ok( + ["dropdb", "--username", _SRC_SUPER, dbname1], "dropdb", extra_env=env + ) + node.bin.command_ok( + [ + "pg_restore", + "--create", + "--verbose", + "--dbname", + "template1", + "--jobs", + "2", + "--username", + username1, + dirfmt, + ], + "parallel restore with create", + extra_env=env, + ) + + +def test_010_dump_connstr(create_pg, pg_bin): + """Full-range LATIN1 db/role names round-trip through dump/restore.""" + dbnames, usernames = _build_names() + dbname1 = dbnames[0] + + node = create_pg( + "main", + start=False, + extra=["--username", _SRC_SUPER, "--locale", "C", "--encoding", "LATIN1"], + ) + _config_auth_roles(node, _SRC_SUPER, usernames) + node.start() + + backupdir = str(node.backup_dir) + discard = backupdir + "/discard.sql" + plain = backupdir + "/plain.sql" + dirfmt = backupdir + "/dirfmt" + + for dbname, username in zip(dbnames, usernames): + _create_db_and_super(node, dbname, username) + + # pg_dumpall --roles-only because it produces a short dump; cross dbname and + # username so each long name is used as both a connection db and a user. + _dumpall_roles_only( + node, dbnames[0], usernames[3], False, "long ASCII name 1", discard + ) + _dumpall_roles_only( + node, dbnames[1], usernames[2], True, "long ASCII name 2", discard + ) + _dumpall_roles_only( + node, dbnames[2], usernames[1], True, "long ASCII name 3", discard + ) + _dumpall_roles_only( + node, dbnames[3], usernames[0], True, "long ASCII name 4", discard + ) + node.bin.command_ok( + [ + "pg_dumpall", + "--no-sync", + "--roles-only", + "--username", + _SRC_SUPER, + "--dbname", + "dbname=template1", + ], + "pg_dumpall --dbname accepts connection string", + extra_env={"LC_ALL": "C", "PGCLIENTENCODING": "LATIN1"}, + ) + + _parallel_dump_restore(node, dbname1, usernames[0], dirfmt) + + node.bin.command_ok( + [ + "pg_dumpall", + "--no-sync", + "--file", + plain, + "--username", + usernames[0], + ], + "take full dump", + extra_env={"LC_ALL": "C", "PGCLIENTENCODING": "LATIN1"}, + ) + + restore_super = "regress_a'b\\c=d\\ne\"f" + + # Restore through psql using environment variables for connection params. + envar_node = _restore_full_dump( + create_pg, "destination_envar", plain, restore_super + ) + result = pg_bin.result( + ["psql", "--no-psqlrc", "--file", plain], + extra_env=_envar_restore_env(envar_node, restore_super), + ) + assert ( + result.rc == 0 + ), "restore full dump using environment variables for connection parameters" + assert result.stderr == "", "no dump errors" + + # Restore through psql using command-line connection params. + cmdline_node = _restore_full_dump( + create_pg, "destination_cmdline", plain, restore_super + ) + result = pg_bin.result( + [ + "psql", + "--port", + str(cmdline_node.port), + "--username", + restore_super, + "--no-psqlrc", + "--file", + plain, + ], + extra_env=_cmdline_restore_env(cmdline_node), + ) + assert ( + result.rc == 0 + ), "restore full dump with command-line options for connection parameters" + assert result.stderr == "", "no dump errors" + + +def _envar_restore_env(node, restore_super): + """Env restoring via PGPORT/PGUSER (no command-line connection params).""" + return { + "LC_ALL": "C", + "PGCLIENTENCODING": "LATIN1", + "PGHOST": str(node.host), + "PGPORT": str(node.port), + "PGUSER": restore_super, + "PGDATABASE": "postgres", + } + + +def _cmdline_restore_env(node): + """Env for command-line restore: only the socket host is provided.""" + return { + "LC_ALL": "C", + "PGCLIENTENCODING": "LATIN1", + "PGHOST": str(node.host), + "PGDATABASE": "postgres", + } diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build index ffbf6ae8d759b..89bc44becc753 100644 --- a/src/bin/pg_upgrade/meson.build +++ b/src/bin/pg_upgrade/meson.build @@ -55,6 +55,22 @@ tests += { 'name': 'pg_upgrade', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'env': { + 'TEST_EXT_LIB': test_ext.full_path(), + }, + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_pg_upgrade.py', + 'pyt/test_003_logical_slots.py', + 'pyt/test_004_subscription.py', + 'pyt/test_005_char_signedness.py', + 'pyt/test_006_transfer_modes.py', + 'pyt/test_007_multixact_conversion.py', + 'pyt/test_008_extension_control_path.py', + ], + 'deps': [test_ext], + }, 'tap': { 'env': { 'with_icu': icu.found() ? 'yes' : 'no', diff --git a/src/bin/pg_upgrade/pyt/test_001_basic.py b/src/bin/pg_upgrade/pyt/test_001_basic.py new file mode 100644 index 0000000000000..286db05e95ae8 --- /dev/null +++ b/src/bin/pg_upgrade/pyt/test_001_basic.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/bin/pg_upgrade/t/001_basic.pl. + +pg_upgrade --help / --version / invalid-option handling. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_basic(pg_bin): + """pg_upgrade --help / --version / invalid-option handling..""" + pg_bin.program_help_ok("pg_upgrade") + pg_bin.program_version_ok("pg_upgrade") + pg_bin.program_options_handling_ok("pg_upgrade") diff --git a/src/bin/pg_upgrade/pyt/test_002_pg_upgrade.py b/src/bin/pg_upgrade/pyt/test_002_pg_upgrade.py new file mode 100644 index 0000000000000..60f7e9699db2c --- /dev/null +++ b/src/bin/pg_upgrade/pyt/test_002_pg_upgrade.py @@ -0,0 +1,521 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/bin/pg_upgrade/t/002_pg_upgrade.pl. + +The canonical same-version pg_upgrade test: set up an old cluster, populate it +with the full core regression suite, take a logical dump, pg_upgrade it into a +fresh new cluster, dump again, and verify the two dumps are identical after +filtering. + +This is the default ``make check`` path, where neither ``oldinstall`` nor +``olddump`` is set in the environment: the old and new clusters are both built +from this tree (v19+). Every cross-version branch in the Perl original gated on +``$oldnode->pg_version`` resolves to the v19 value, so: + +* ``pg_version >= 11`` -> custom initdb opts (--wal-segsize, --allow-group-access). +* ``pg_version >= 17devel`` -> builtin C.UTF-8 locale provider for the old node. +* ``pg_version < 18``/``< 12`` -> false, so no -k and no --extra-float-digits. +* the ``adjust_database_contents`` / probin-rewrite blocks are gated on + ``oldinstall`` being defined and are therefore skipped. +* ``adjust_old_dumpfile`` / ``adjust_new_dumpfile`` reduce to their unconditional + parts (CRLF normalization, version-comment removal, stats-version masking and + blank-line suppression); see _filter_dump. + +The dump/restore round-trip section is gated on ``regress_dump_restore`` being +listed in ``PG_TEST_EXTRA`` (as in the Perl original) and is skipped otherwise. +""" + +import os +import re +import shutil + +import pypg + +_MODE = os.environ.get("PG_TEST_PG_UPGRADE_MODE") or "--copy" + +# Same-version port: the old node is the current build (v19+), so these are the +# values the Perl original computes for pg_version >= 17devel. +_ORIGINAL_ENCODING = 6 +_ORIGINAL_PROVIDER = "b" +_ORIGINAL_DATCOLLATE = "C" +_ORIGINAL_DATCTYPE = "C" +_ORIGINAL_DATLOCALE = "C.UTF-8" +_EXPECTED_LOCALE_ROW = "{}|{}|{}|{}|{}".format( + _ORIGINAL_ENCODING, + _ORIGINAL_PROVIDER, + _ORIGINAL_DATCOLLATE, + _ORIGINAL_DATCTYPE, + _ORIGINAL_DATLOCALE, +) + +# To increase coverage of non-standard segment size and group access without +# increasing test runtime, run with a custom setting (--wal-segsize, +# --allow-group-access added in v11). +_CUSTOM_OPTS = ["--wal-segsize", "1", "--allow-group-access"] + + +def _generate_db(old, prefix, from_char, to_char, suffix): + """Create a database whose name spans a range of ASCII bytes. + + Mirrors the Perl generate_db: BEL, LF and CR are skipped. createdb runs + against the old node (PGHOST/PGPORT from its connection env). + """ + dbname = prefix + for i in range(from_char, to_char + 1): + if i in (7, 10, 13): # skip BEL, LF, and CR + continue + dbname += chr(i) + dbname += suffix + old.bin.command_ok( + ["createdb", dbname], + "created database with ASCII characters from {} to {}".format( + from_char, to_char + ), + ) + + +def _filter_dump(dump_file): + """Filter a dump for content comparison; return the filtered file path. + + Mirrors PostgreSQL::Test::AdjustUpgrade::adjust_old_dumpfile / + adjust_new_dumpfile for the same-version (old_version == current) case: + every version-conditional rewrite is inactive, so both reduce to the same + set of unconditional transforms. + """ + contents = pypg.slurp_file(dump_file) + # use Unix newlines + contents = contents.replace("\r\n", "\n") + # Version comments will certainly not match. + contents = re.sub(r"^-- Dumped from database version.*\n", "", contents, flags=re.M) + # Same with the version argument to pg_restore_relation_stats(), + # pg_restore_attribute_stats() or pg_restore_extended_stats(). + contents = re.sub( + r"\n(\s+'version',) '\d+'::integer,$", + r"\n\1 '000000'::integer,", + contents, + flags=re.M, + ) + # Suppress blank lines, as some places in pg_dump emit more or fewer. + contents = re.sub(r"\n\n+", "\n", contents) + filtered = dump_file + "_filtered" + with open(filtered, "w", encoding="utf-8") as fh: + fh.write(contents) + return filtered + + +def _adjust_regress_dumpfile(dump, adjust_child_columns): + """Remove the known dump/restore differences from a regression-db dump. + + Mirrors PostgreSQL::Test::AdjustDump::adjust_regress_dumpfile. Only used by + the (normally skipped) regress_dump_restore round-trip section. + """ + dump = dump.replace("\r\n", "\n") + if adjust_child_columns: + dump = re.sub( + r"(^CREATE\sTABLE\sgenerated_stored_tests\.gtestxx_4\s\()" + r"(\n\s+b\sinteger)," + r"(\n\s+a\sinteger\sNOT\sNULL)", + r"\1\3,\2", + dump, + flags=re.M | re.X, + ) + dump = re.sub( + r"(^CREATE\sTABLE\sgenerated_virtual_tests\.gtestxx_4\s\()" + r"(\n\s+b\sinteger)," + r"(\n\s+a\sinteger\sNOT\sNULL)", + r"\1\3,\2", + dump, + flags=re.M | re.X, + ) + dump = re.sub( + r"(^CREATE\sTABLE\spublic\.test_type_diff2_c1\s\()" + r"(\n\s+int_four\sbigint)," + r"(\n\s+int_eight\sbigint)," + r"(\n\s+int_two\ssmallint)", + r"\1\4,\2,\3", + dump, + flags=re.M | re.X, + ) + dump = re.sub( + r"(^CREATE\sTABLE\spublic\.test_type_diff2_c2\s\()" + r"(\n\s+int_eight\sbigint)," + r"(\n\s+int_two\ssmallint)," + r"(\n\s+int_four\sbigint)", + r"\1\3,\4,\2", + dump, + flags=re.M | re.X, + ) + for table in ( + r"public\.b_star", + r"public\.c_star", + r"public\.cc2", + r"public\.d_star", + r"public\.e_star", + r"public\.f_star", + r"public\.renamecolumnanother", + r"public\.renamecolumnchild", + r"public\.test_type_diff2_c1", + r"public\.test_type_diff2_c2", + r"public\.test_type_diff_c", + ): + dump = re.sub(r"^COPY " + table + r" \(.+?^\\\.$", "", dump, flags=re.S | re.M) + dump = re.sub(r"\n\n+", "\n", dump) + return dump + + +def _init_old_node(create_pg): + """Initialize, configure and start the old node; return it.""" + # Set up locale settings for the original cluster so we can later test that + # pg_upgrade copies template0's locale from the old to the new cluster. + old_initdb_params = _CUSTOM_OPTS + [ + "--encoding", + "UTF-8", + "--lc-collate", + _ORIGINAL_DATCOLLATE, + "--lc-ctype", + _ORIGINAL_DATCTYPE, + "--locale-provider", + "builtin", + "--builtin-locale", + _ORIGINAL_DATLOCALE, + ] + old = create_pg("old_node", start=False, extra=old_initdb_params) + # Override log_statement=all set by Cluster.pm to avoid log traffic that + # slows this test down, and run the regression tests at the same wal_level + # as 'make check'. + old.append_conf("log_statement = none") + old.append_conf("wal_level = replica") + old.start() + return old + + +def _check_original_locales(old): + """Assert template0's locale fields in the original cluster.""" + result = old.safe_psql( + "SELECT encoding, datlocprovider, datcollate, datctype, datlocale\n" + " FROM pg_database WHERE datname='template0'" + ) + assert result == _EXPECTED_LOCALE_ROW, "check locales in original cluster" + + +def _populate_old_with_regress(old, pg_bin): + """Create the boundary databases and run the full regression suite.""" + # Create databases with names covering most ASCII bytes. The first name + # exercises backslashes adjacent to double quotes, a Windows special case. + _generate_db(old, 'regression\\"\\', 1, 45, '\\\\"\\\\\\') + _generate_db(old, "regression", 46, 90, "") + _generate_db(old, "regression", 91, 127, "") + + # Repo root: pyt/ -> pg_upgrade -> bin -> src -> root (the Perl computes + # abs_path("../../..") relative to src/bin/pg_upgrade). + srcdir = os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "..", "..", "..") + ) + extra_opts = os.environ.get("EXTRA_REGRESS_OPTS") or "" + # --dlpath finds regress.so and any libraries the tests require. + dlpath = os.path.dirname(os.environ["REGRESS_SHLIB"]) + inputdir = os.path.join(srcdir, "src", "test", "regress") + outputdir = str(_tmp_check()) + cmd = ( + [os.environ["PG_REGRESS"]] + + extra_opts.split() + + [ + "--dlpath=" + dlpath, + "--bindir=", + "--host=" + str(old.host), + "--port=" + str(old.port), + "--schedule=" + os.path.join(inputdir, "parallel_schedule"), + "--max-concurrent-tests=20", + "--inputdir=" + inputdir, + "--outputdir=" + outputdir, + ] + ) + pg_bin.command_ok(cmd, "regression tests in old instance") + + +def _init_new_node(create_pg): + """Initialize and configure the new node (different locale, overwritten).""" + new_initdb_params = _CUSTOM_OPTS + [ + "--encoding", + "SQL_ASCII", + "--locale-provider", + "libc", + ] + new = create_pg("new_node", start=False, extra=new_initdb_params) + new.append_conf("log_statement = none") + # Stabilize stats for comparison. + new.append_conf("autovacuum = off") + return new + + +def _maybe_regress_dump_restore(create_pg, pg_bin, old, tempdir): + """Round-trip the regression database through dump/restore and compare. + + Skipped unless regress_dump_restore is listed in PG_TEST_EXTRA, mirroring + the SKIP block in the Perl original. (The "different versions" and + "non-default install" skips never apply in this same-version port.) This + is one section of the larger test, so an unset extra returns early rather + than skipping the whole test. + """ + extra = os.environ.get("PG_TEST_EXTRA", "") + if not re.search(r"\bregress_dump_restore\b", extra): + return + + dstnode = create_pg("dst_node", start=False, extra=_old_node_params()) + dstnode.append_conf("log_statement = none") + dstnode.append_conf("autovacuum = off") + dstnode.start() + + # Use --create so the restored database keeps the source's configurable + # settings (avoids locale-driven dump differences) and to cover --create. + # Use directory format for parallel dump/restore. + dump_file = os.path.join(tempdir, "regression.dump") + pg_bin.command_ok( + [ + "pg_dump", + "-Fd", + "-j2", + "--no-sync", + "-d", + old.connstr("regression"), + "--create", + "-f", + dump_file, + ], + "pg_dump on source instance", + ) + dstnode.bin.command_ok( + ["pg_restore", "--create", "-j2", "-d", "postgres", dump_file], + "pg_restore to destination instance", + ) + src_dump = _get_dump_for_comparison(old, "regression", "src_dump", 1, tempdir) + dst_dump = _get_dump_for_comparison(dstnode, "regression", "dest_dump", 0, tempdir) + pypg.compare_files( + src_dump, + dst_dump, + "dump outputs from original and restored regression databases match", + ) + + +def _get_dump_for_comparison(node, db, file_prefix, adjust_child_columns, tempdir): + """Plain-format dump of db adjusted for original/restored comparison. + + Mirrors the Perl get_dump_for_comparison helper. Returns the path of the + adjusted dump file. + """ + dumpfile = os.path.join(tempdir, file_prefix + ".sql") + dump_adjusted = dumpfile + "_adjusted" + node.bin.run_command( + [ + "pg_dump", + "--no-sync", + "--restrict-key", + "test", + "-d", + node.connstr(db), + "-f", + dumpfile, + ] + ) + with open(dump_adjusted, "w", encoding="utf-8") as fh: + fh.write( + _adjust_regress_dumpfile(pypg.slurp_file(dumpfile), adjust_child_columns) + ) + return dump_adjusted + + +def _old_node_params(): + """The initdb params used for the old node (shared with dst_node).""" + return _CUSTOM_OPTS + [ + "--encoding", + "UTF-8", + "--lc-collate", + _ORIGINAL_DATCOLLATE, + "--lc-ctype", + _ORIGINAL_DATCTYPE, + "--locale-provider", + "builtin", + "--builtin-locale", + _ORIGINAL_DATLOCALE, + ] + + +def _dumpall(pg_bin, target_connstr, dump_file, msg): + """Run pg_dumpall against target_connstr into dump_file and assert success. + + Mirrors the Perl @dump_command run via the new node; --extra-float-digits + (only for old pg_version < 12) is never needed in this same-version port. + """ + pg_bin.command_ok( + [ + "pg_dumpall", + "--no-sync", + "--restrict-key", + "test", + "--dbname", + target_connstr, + "--file", + dump_file, + ], + msg, + ) + + +def _pg_upgrade_cmd(old, new, oldbindir, newbindir, *extra): + """Build the pg_upgrade command line shared by every invocation.""" + return [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + str(old.datadir), + "--new-datadir", + str(new.datadir), + "--old-bindir", + oldbindir, + "--new-bindir", + newbindir, + "--socketdir", + str(new.host), + "--old-port", + str(old.port), + "--new-port", + str(new.port), + _MODE, + *extra, + ] + + +def _output_dir(new): + """Path of the pg_upgrade logging directory under the new data dir.""" + return os.path.join(new.datadir, "pg_upgrade_output.d") + + +def _check_phase(pg_bin, old, new, oldbindir, newbindir): + """Run the pg_upgrade --check failure/success cases and clean up. + + Covers: a bad old-bindir leaving pg_upgrade_output.d behind, an invalid + database aborting --check, and a clean --check that removes the directory. + """ + # Cause a failure at the very start of pg_upgrade; this should create the + # logging directory pg_upgrade_output.d but leave it around. --check keeps + # an early exit. + pg_bin.command_checks_all( + _pg_upgrade_cmd(old, new, oldbindir + "/does/not/exist/", newbindir, "--check"), + 1, + [r'check for ".*?does/not/exist" failed'], + [], + "run of pg_upgrade --check for new instance with incorrect binary path", + ) + assert os.path.isdir( + _output_dir(new) + ), "pg_upgrade_output.d/ not removed after pg_upgrade failure" + shutil.rmtree(_output_dir(new)) + + # pg_upgrade aborts when it encounters an invalid database. + pg_bin.command_checks_all( + _pg_upgrade_cmd(old, new, oldbindir, newbindir, "--check"), + 1, + [r"datconnlimit"], + [r"^$"], + "invalid database causes failure", + ) + shutil.rmtree(_output_dir(new)) + + +def _final_check_and_upgrade(pg_bin, old, new, oldbindir, newbindir): + """A clean --check then the real pg_upgrade; verify the log dir lifecycle.""" + pg_bin.command_ok( + _pg_upgrade_cmd(old, new, oldbindir, newbindir, "--check"), + "run of pg_upgrade --check for new instance", + ) + assert not os.path.isdir( + _output_dir(new) + ), "pg_upgrade_output.d/ removed after pg_upgrade --check success" + + pg_bin.command_ok( + _pg_upgrade_cmd(old, new, oldbindir, newbindir), + "run of pg_upgrade for new instance", + ) + assert not os.path.isdir( + _output_dir(new) + ), "pg_upgrade_output.d/ removed after pg_upgrade success" + + +def test_002_pg_upgrade(create_pg, pg_bin, tmp_path, monkeypatch): + """Same-version pg_upgrade: regression dump matches before and after.""" + tempdir = str(tmp_path) + dump1_file = os.path.join(tempdir, "dump1.sql") + dump2_file = os.path.join(tempdir, "dump2.sql") + + old = _init_old_node(create_pg) + _check_original_locales(old) + _populate_old_with_regress(old, pg_bin) + + new = _init_new_node(create_pg) + newbindir = new.config_data("--bindir") + oldbindir = old.config_data("--bindir") + + # Stabilize stats before pg_dump / pg_dumpall. Doing it after initializing + # the new node gives autovacuum enough time to update old-node statistics. + old.append_conf("autovacuum = off") + old.restart() + + _maybe_regress_dump_restore(create_pg, pg_bin, old, tempdir) + + # Take a dump before the upgrade as a base comparison, using the new node's + # pg_dumpall (here equivalent to the old node's, same version). + _dumpall( + pg_bin, old.connstr("postgres"), dump1_file, "dump before running pg_upgrade" + ) + + # Create an invalid database; deleted below after the --check tests. + old.safe_psql( + "CREATE DATABASE regression_invalid;\n" + "UPDATE pg_database SET datconnlimit = -2 " + "WHERE datname = 'regression_invalid';" + ) + + # In a VPATH build we start in the source directory, but we want to run + # pg_upgrade in the build directory so generated files (e.g. + # delete_old_cluster.{sh,bat}) finish there. + monkeypatch.chdir(tempdir) + + old.stop() + _check_phase(pg_bin, old, new, oldbindir, newbindir) + + # Drop the invalid database so we can continue. + old.start() + old.safe_psql("DROP DATABASE regression_invalid") + old.stop() + + _final_check_and_upgrade(pg_bin, old, new, oldbindir, newbindir) + + new.start() + + # Test that the upgraded cluster has the original locale settings. + result = new.safe_psql( + "SELECT encoding, datlocprovider, datcollate, datctype, datlocale\n" + " FROM pg_database WHERE datname='template0'" + ) + assert ( + result == _EXPECTED_LOCALE_ROW + ), "check that locales in new cluster match original cluster" + + # Second dump from the upgraded instance. + _dumpall( + pg_bin, new.connstr("postgres"), dump2_file, "dump after running pg_upgrade" + ) + + # Filter the contents of the dumps, then compare; there should be no diffs. + dump1_filtered = _filter_dump(dump1_file) + dump2_filtered = _filter_dump(dump2_file) + pypg.compare_files( + dump1_filtered, dump2_filtered, "old and new dumps match after pg_upgrade" + ) + + +def _tmp_check(): + import tempfile # pylint: disable=import-outside-toplevel + + return tempfile.mkdtemp(prefix="pgupgrade_regress_") diff --git a/src/bin/pg_upgrade/pyt/test_003_logical_slots.py b/src/bin/pg_upgrade/pyt/test_003_logical_slots.py new file mode 100644 index 0000000000000..34916113e6551 --- /dev/null +++ b/src/bin/pg_upgrade/pyt/test_003_logical_slots.py @@ -0,0 +1,248 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_upgrade/t/003_logical_slots.pl. + +Tests for upgrading logical replication slots. +""" + +import os +import re + +import pypg + + +def _build_pg_upgrade_cmd(oldpub, newpub, mode): + """Build the common pg_upgrade command used by all the test cases.""" + return [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + str(oldpub.datadir), + "--new-datadir", + str(newpub.datadir), + "--old-bindir", + oldpub.config_data("--bindir"), + "--new-bindir", + newpub.config_data("--bindir"), + "--socketdir", + str(newpub.host), + "--old-port", + str(oldpub.port), + "--new-port", + str(newpub.port), + mode, + ] + + +def _find_under(directory, needle): + """Return the path of the first file under directory whose name matches. + + Mirrors the File::Find::find usage in the Perl original: the output + directory contains a milliseconds timestamp, so the file's path cannot be + predicted and must be discovered by walking the tree. + """ + for dirpath, _dirs, files in os.walk(directory): + for name in files: + if needle in name: + return os.path.join(dirpath, name) + return None + + +def _check_insufficient_max_replication_slots(pg_bin, oldpub, newpub, mode): + """TEST: pg_upgrade fails when the new cluster has wrong GUC values.""" + # Preparations for the subsequent test: + # 1. Create three slots on the old cluster + oldpub.start() + oldpub.safe_psql( + "SELECT pg_create_logical_replication_slot('test_slot1', 'test_decoding');\n" + "SELECT pg_create_logical_replication_slot('test_slot2', 'test_decoding');\n" + "SELECT pg_create_logical_replication_slot('test_slot3', 'test_decoding');" + ) + oldpub.stop() + + # 2. Set 'max_replication_slots' to be less than the number of slots (3) + # present on the old cluster. + newpub.append_conf("max_replication_slots = 1", "postgresql.conf") + + # pg_upgrade will fail because the new cluster has insufficient + # max_replication_slots + pg_bin.command_checks_all( + _build_pg_upgrade_cmd(oldpub, newpub, mode), + 1, + [ + r'"max_replication_slots" \(1\) must be greater than or equal to the ' + r"number of logical replication slots \(3\) on the old cluster" + ], + [r""], + "run of pg_upgrade where the new cluster has insufficient " + '"max_replication_slots"', + ) + assert os.path.isdir( + os.path.join(newpub.datadir, "pg_upgrade_output.d") + ), "pg_upgrade_output.d/ not removed after pg_upgrade failure" + + # Set 'max_replication_slots' to match the number of slots (3) present on + # the old cluster. Both slots will be used for subsequent tests. + newpub.append_conf("max_replication_slots = 3", "postgresql.conf") + + +def _check_unconsumed_wal(pg_bin, oldpub, newpub, mode): + """TEST: pg_upgrade fails when a slot still has unconsumed WAL records.""" + # Preparations for the subsequent test: + # 1. Generate extra WAL records. At this point none of the slots has + # consumed them. + # 2. Advance the slot test_slot2 up to the current WAL location, but + # test_slot1 still has unconsumed WAL records. + # 3. Emit a non-transactional message. This will cause test_slot2 to detect + # the unconsumed WAL record. + # 4. Advance the slot test_slot3 up to the current WAL location. + oldpub.start() + oldpub.safe_psql( + "CREATE TABLE tbl AS SELECT generate_series(1, 10) AS a;\n" + "SELECT pg_replication_slot_advance('test_slot2', pg_current_wal_lsn());\n" + "SELECT count(*) FROM pg_logical_emit_message('false', 'prefix', " + "'This is a non-transactional message', true);\n" + "SELECT pg_replication_slot_advance('test_slot3', pg_current_wal_lsn());" + ) + oldpub.stop() + + # pg_upgrade will fail because there are slots still having unconsumed WAL + # records + pg_bin.command_checks_all( + _build_pg_upgrade_cmd(oldpub, newpub, mode), + 1, + [ + r"Your installation contains logical replication slots that cannot " + r"be upgraded\." + ], + [r""], + "run of pg_upgrade of old cluster with slots having unconsumed WAL records", + ) + + # Verify the reason why the logical replication slot cannot be upgraded. + # Find a txt file that contains a list of logical replication slots that + # cannot be upgraded. We cannot predict the file's path because the output + # directory contains a milliseconds timestamp. + slots_filename = _find_under( + os.path.join(newpub.datadir, "pg_upgrade_output.d"), + "invalid_logical_slots.txt", + ) + assert slots_filename is not None, "invalid_logical_slots.txt not found" + + # Check the file content. While both test_slot1 and test_slot2 should be + # reporting that they have unconsumed WAL records, test_slot3 should not be + # reported as it has caught up. + contents = pypg.slurp_file(slots_filename) + + assert re.search( + r'The slot "test_slot1" has not consumed the WAL yet', contents, re.MULTILINE + ), "the previous test failed due to unconsumed WALs" + assert re.search( + r'The slot "test_slot2" has not consumed the WAL yet', contents, re.MULTILINE + ), "the previous test failed due to unconsumed WALs" + assert not re.search( + r"test_slot3", contents, re.MULTILINE + ), "caught-up slot is not reported" + + +def _check_successful_upgrade(create_pg, pg_bin, oldpub, newpub, mode): + """TEST: Successful upgrade with logical replication migrated.""" + # Preparations for the subsequent test: + # 1. Setup logical replication (first, cleanup slots from previous tests) + old_connstr = oldpub.connstr() + " dbname=postgres" + + oldpub.start() + oldpub.safe_psql( + "SELECT * FROM pg_drop_replication_slot('test_slot1');\n" + "SELECT * FROM pg_drop_replication_slot('test_slot2');\n" + "SELECT * FROM pg_drop_replication_slot('test_slot3');\n" + "CREATE PUBLICATION regress_pub FOR ALL TABLES;" + ) + + # Initialize subscriber cluster + sub = create_pg("sub", start=False) + sub.start() + sub.safe_psql( + "CREATE TABLE tbl (a int);\n" + "CREATE SUBSCRIPTION regress_sub CONNECTION '{}' PUBLICATION regress_pub " + "WITH (two_phase = 'true', failover = 'true')".format(old_connstr) + ) + sub.wait_for_subscription_sync(oldpub, "regress_sub") + + # Also wait for two-phase to be enabled + twophase_query = ( + "SELECT count(1) = 0 FROM pg_subscription " + "WHERE subtwophasestate NOT IN ('e');" + ) + assert sub.poll_query_until( + twophase_query + ), "Timed out while waiting for subscriber to enable twophase" + + # 2. Temporarily disable the subscription + sub.safe_psql("ALTER SUBSCRIPTION regress_sub DISABLE") + oldpub.stop() + + # pg_upgrade should be successful + pg_bin.command_ok( + _build_pg_upgrade_cmd(oldpub, newpub, mode), + "run of pg_upgrade of old cluster", + ) + + # Check that the slot 'regress_sub' has migrated to the new cluster + newpub.start() + result = newpub.safe_psql( + "SELECT slot_name, two_phase, failover FROM pg_replication_slots" + ) + assert result == "regress_sub|t|t", "check the slot exists on new cluster" + + # Update the connection + new_connstr = newpub.connstr() + " dbname=postgres" + sub.safe_psql( + "ALTER SUBSCRIPTION regress_sub CONNECTION '{}';\n" + "ALTER SUBSCRIPTION regress_sub ENABLE;".format(new_connstr) + ) + + # Check whether changes on the new publisher get replicated to the subscriber + newpub.safe_psql("INSERT INTO tbl VALUES (generate_series(11, 20))") + newpub.wait_for_catchup("regress_sub") + result = sub.safe_psql("SELECT count(*) FROM tbl") + assert result == "20", "check changes are replicated to the sub" + + # Clean up + sub.stop() + newpub.stop() + + +def test_003_logical_slots(create_pg, pg_bin, tmp_check, monkeypatch): + """Upgrade logical replication slots across a same-version pg_upgrade.""" + # Can be changed to test the other modes + mode = os.environ.get("PG_TEST_PG_UPGRADE_MODE") or "--copy" + + # Initialize old cluster + oldpub = create_pg("oldpub", start=False, allows_streaming="logical") + oldpub.append_conf("autovacuum = off", "postgresql.conf") + + # Initialize new cluster + newpub = create_pg("newpub", start=False, allows_streaming="logical") + + # During upgrade, when pg_restore performs CREATE DATABASE, bgwriter or + # checkpointer may flush buffers and hold a file handle for the system + # table. So, if later due to some reason we need to re-create the file with + # the same name like a TRUNCATE command on the same table, then the command + # will fail if OS (such as older Windows versions) doesn't remove an + # unlinked file completely till it is open. The probability of seeing this + # behavior is higher in this test because we use wal_level as logical via + # allows_streaming => 'logical' which in turn set shared_buffers as 1MB. + newpub.append_conf( + "bgwriter_lru_maxpages = 0\ncheckpoint_timeout = 1h", + "postgresql.conf", + ) + + # In a VPATH build, we'll be started in the source directory, but we want to + # run pg_upgrade in the build directory so that any files generated finish + # in it, like delete_old_cluster.{sh,bat}. + monkeypatch.chdir(tmp_check) + + _check_insufficient_max_replication_slots(pg_bin, oldpub, newpub, mode) + _check_unconsumed_wal(pg_bin, oldpub, newpub, mode) + _check_successful_upgrade(create_pg, pg_bin, oldpub, newpub, mode) diff --git a/src/bin/pg_upgrade/pyt/test_004_subscription.py b/src/bin/pg_upgrade/pyt/test_004_subscription.py new file mode 100644 index 0000000000000..7d5ca90ee7130 --- /dev/null +++ b/src/bin/pg_upgrade/pyt/test_004_subscription.py @@ -0,0 +1,446 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_upgrade/t/004_subscription.pl. + +Test for pg_upgrade of logical subscription. Note that after the successful +upgrade test, we can't use the old cluster to prevent failing in --link mode. +""" + +import os +import re +import shutil + +import pypg + + +def _check_cmd(old_sub, new_sub, oldbindir, newbindir, mode): + """Build a pg_upgrade --check command for the old/new subscriber pair.""" + return [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + str(old_sub.datadir), + "--new-datadir", + str(new_sub.datadir), + "--old-bindir", + oldbindir, + "--new-bindir", + newbindir, + "--socketdir", + str(new_sub.host), + "--old-port", + str(old_sub.port), + "--new-port", + str(new_sub.port), + mode, + "--check", + ] + + +def _upgrade_cmd(old_sub, new_sub, oldbindir, newbindir, mode): + """Build a (non --check) pg_upgrade command for the old/new subscriber.""" + return [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + str(old_sub.datadir), + "--new-datadir", + str(new_sub.datadir), + "--old-bindir", + oldbindir, + "--new-bindir", + newbindir, + "--socketdir", + str(new_sub.host), + "--old-port", + str(old_sub.port), + "--new-port", + str(new_sub.port), + mode, + ] + + +def _find_under(directory, needle): + """Return the path of the first file under directory whose name matches. + + Mirrors the File::Find::find usage in the Perl original; the output + directory contains a milliseconds timestamp so the path can't be predicted. + """ + for dirpath, _dirs, files in os.walk(directory): + for name in files: + if needle in name: + return os.path.join(dirpath, name) + return None + + +def _check_insufficient_origins(pg_bin, pub, old_sub, new_sub, dirs, mode): + """pg_upgrade fails when max_active_replication_origins is too small.""" + connstr, oldbindir, newbindir = dirs + # It is sufficient to use disabled subscription to test upgrade failure. + pub.safe_psql("CREATE PUBLICATION regress_pub1") + old_sub.safe_psql( + "CREATE SUBSCRIPTION regress_sub1 CONNECTION '{}' " + "PUBLICATION regress_pub1 WITH (enabled = false)".format(connstr) + ) + old_sub.stop() + new_sub.append_conf("max_active_replication_origins = 0", "postgresql.conf") + + pg_bin.command_checks_all( + _check_cmd(old_sub, new_sub, oldbindir, newbindir, mode), + 1, + [ + r'"max_active_replication_origins" \(0\) must be greater than or ' + r"equal to the number of subscriptions \(1\) on the old cluster" + ], + [r""], + "run of pg_upgrade where the new cluster has insufficient " + "max_active_replication_origins", + ) + + # Reset max_active_replication_origins + new_sub.append_conf("max_active_replication_origins = 10", "postgresql.conf") + + # Cleanup + pub.safe_psql("DROP PUBLICATION regress_pub1") + old_sub.start() + old_sub.safe_psql("DROP SUBSCRIPTION regress_sub1;") + + +def _check_insufficient_slots(pg_bin, pub, old_sub, new_sub, dirs, mode): + """pg_upgrade fails when max_replication_slots is too small. + + The new cluster needs at least the number of logical slots on the old + cluster plus one for retaining conflict-detection information when a + subscription enables retain_dead_tuples. + """ + connstr, oldbindir, newbindir = dirs + pub.safe_psql("CREATE PUBLICATION regress_pub1") + old_sub.safe_psql( + "CREATE SUBSCRIPTION regress_sub1 CONNECTION '{}' " + "PUBLICATION regress_pub1 WITH (enabled = false, " + "retain_dead_tuples = true)".format(connstr) + ) + old_sub.stop() + new_sub.append_conf("max_replication_slots = 0", "postgresql.conf") + + pg_bin.command_checks_all( + _check_cmd(old_sub, new_sub, oldbindir, newbindir, mode), + 1, + [ + r'"max_replication_slots" \(0\) must be greater than or equal to ' + r"the number of logical replication slots on the old cluster plus " + r"one additional slot required for retaining conflict detection " + r"information \(1\)" + ], + [r""], + "run of pg_upgrade where the new cluster has insufficient " + "max_replication_slots", + ) + + # Reset max_replication_slots + new_sub.append_conf("max_replication_slots = 10", "postgresql.conf") + + # Cleanup + pub.safe_psql("DROP PUBLICATION regress_pub1") + old_sub.start() + old_sub.safe_psql("DROP SUBSCRIPTION regress_sub1;") + + +def _setup_invalid_states(pub, old_sub, connstr): + """Create a subscription with a relation in 'd' state and one missing its + replication origin, returning when both invalid conditions are present.""" + pub.safe_psql( + "CREATE TABLE tab_primary_key(id serial PRIMARY KEY);\n" + "INSERT INTO tab_primary_key values(1);\n" + "CREATE PUBLICATION regress_pub2 FOR TABLE tab_primary_key;" + ) + # Insert the same value that is already present in publisher to the primary + # key column of subscriber so that the table sync will fail. + old_sub.safe_psql( + "CREATE TABLE tab_primary_key(id serial PRIMARY KEY);\n" + "INSERT INTO tab_primary_key values(1);\n" + "CREATE SUBSCRIPTION regress_sub2 CONNECTION '{}' " + "PUBLICATION regress_pub2;".format(connstr) + ) + # Table will be in 'd' (data is being copied) state as table sync will fail + # because of primary key constraint error. + started_query = ( + "SELECT count(1) = 1 FROM pg_subscription_rel WHERE srsubstate = 'd'" + ) + assert old_sub.poll_query_until( + started_query + ), "Timed out while waiting for the table state to become 'd' (datasync)" + + # Setup another logical replication and drop the subscription's replication + # origin. + pub.safe_psql("CREATE PUBLICATION regress_pub3") + old_sub.safe_psql( + "CREATE SUBSCRIPTION regress_sub3 CONNECTION '{}' " + "PUBLICATION regress_pub3 WITH (enabled = false)".format(connstr) + ) + sub_oid = old_sub.safe_psql( + "SELECT oid FROM pg_subscription WHERE subname = 'regress_sub3'" + ) + replorigin = "pg_" + sub_oid + old_sub.safe_psql("SELECT pg_replication_origin_drop('{}')".format(replorigin)) + old_sub.stop() + + +def _check_invalid_relstate(pg_bin, pub, old_sub, new_sub, dirs, mode): + """pg_upgrade refuses to run with invalid subscription rel state / origin. + + Specifically when there's a subscription with tables in a state other than + 'r' (ready) or 'i' (init), and/or the subscription has no replication + origin. + """ + connstr, oldbindir, newbindir = dirs + _setup_invalid_states(pub, old_sub, connstr) + + pg_bin.command_checks_all( + _check_cmd(old_sub, new_sub, oldbindir, newbindir, mode), + 1, + [ + re.escape( + "Your installation contains subscriptions without origin or " + "having relations not in i (initialize) or r (ready) state" + ) + ], + [], + "run of pg_upgrade --check for old instance with relation in 'd' " + "datasync(invalid) state and missing replication origin", + ) + + # Verify the reason why the subscriber cannot be upgraded. + sub_relstate_filename = _find_under( + os.path.join(new_sub.datadir, "pg_upgrade_output.d"), "subs_invalid.txt" + ) + assert sub_relstate_filename is not None, "subs_invalid.txt not found" + contents = pypg.slurp_file(sub_relstate_filename) + + # Check the file content which should have tab_primary_key table in an + # invalid state. + assert re.search( + r'The table sync state "d" is not allowed for database:"postgres" ' + r'subscription:"regress_sub2" schema:"public" ' + r'relation:"tab_primary_key"', + contents, + re.MULTILINE, + ), "the previous test failed due to subscription table in invalid state" + # Check the file content which should have regress_sub3 subscription. + assert re.search( + r'The replication origin is missing for database:"postgres" ' + r'subscription:"regress_sub3"', + contents, + re.MULTILINE, + ), "the previous test failed due to missing replication origin" + + # Cleanup + old_sub.start() + pub.safe_psql( + "DROP PUBLICATION regress_pub2;\n" + "DROP PUBLICATION regress_pub3;\n" + "DROP TABLE tab_primary_key;" + ) + old_sub.safe_psql( + "DROP SUBSCRIPTION regress_sub2;\n" + "DROP SUBSCRIPTION regress_sub3;\n" + "DROP TABLE tab_primary_key;" + ) + shutil.rmtree(os.path.join(new_sub.datadir, "pg_upgrade_output.d")) + + +def _setup_ready_and_init(pub, old_sub, connstr): + """Set up subscriptions with ready and init state tables before upgrade. + + Returns (remote_lsn, oids) where oids is (tab_upgraded, tab_upgraded1, + tab_upgraded2) relation OIDs on the old subscriber. + """ + # Use multiple tables to verify deterministic pg_dump ordering of + # subscription relations during --binary-upgrade. + pub.safe_psql( + "CREATE TABLE tab_upgraded(id int);\n" + "CREATE TABLE tab_upgraded1(id int);\n" + "CREATE PUBLICATION regress_pub4 FOR TABLE tab_upgraded, tab_upgraded1;" + ) + old_sub.safe_psql( + "CREATE TABLE tab_upgraded(id int);\n" + "CREATE TABLE tab_upgraded1(id int);\n" + "CREATE SUBSCRIPTION regress_sub4 CONNECTION '{}' " + "PUBLICATION regress_pub4 WITH (failover = true, " + "retain_dead_tuples = true);".format(connstr) + ) + + # Wait till the tables tab_upgraded and tab_upgraded1 reach 'ready' state + synced_query = "SELECT count(1) = 2 FROM pg_subscription_rel WHERE srsubstate = 'r'" + assert old_sub.poll_query_until( + synced_query + ), "Timed out while waiting for the table to reach ready state" + + pub.safe_psql("INSERT INTO tab_upgraded1 VALUES (generate_series(1,50))") + pub.wait_for_catchup("regress_sub4") + + # Change configuration to prepare a subscription table in init state + old_sub.append_conf("max_logical_replication_workers = 0", "postgresql.conf") + old_sub.restart() + + # Setup another logical replication + pub.safe_psql( + "CREATE TABLE tab_upgraded2(id int);\n" + "CREATE PUBLICATION regress_pub5 FOR TABLE tab_upgraded2;" + ) + old_sub.safe_psql( + "CREATE TABLE tab_upgraded2(id int);\n" + "CREATE SUBSCRIPTION regress_sub5 CONNECTION '{}' " + "PUBLICATION regress_pub5;".format(connstr) + ) + + # The table tab_upgraded2 will be in the init state as the subscriber's + # configuration for max_logical_replication_workers is set to 0. + result = old_sub.safe_psql( + "SELECT count(1) = 1 FROM pg_subscription_rel WHERE srsubstate = 'i'" + ) + assert result == "t", "Check that the table is in init state" + + # Get the replication origin's remote_lsn of the old subscriber + remote_lsn = old_sub.safe_psql( + "SELECT remote_lsn FROM pg_replication_origin_status os, " + "pg_subscription s WHERE os.external_id = 'pg_' || s.oid " + "AND s.subname = 'regress_sub4'" + ) + # Have the subscription in disabled state before upgrade + old_sub.safe_psql("ALTER SUBSCRIPTION regress_sub5 DISABLE") + + oids = ( + old_sub.safe_psql("SELECT oid FROM pg_class WHERE relname = 'tab_upgraded'"), + old_sub.safe_psql("SELECT oid FROM pg_class WHERE relname = 'tab_upgraded1'"), + old_sub.safe_psql("SELECT oid FROM pg_class WHERE relname = 'tab_upgraded2'"), + ) + old_sub.stop() + return remote_lsn, oids + + +def _verify_upgraded_state(pub, new_sub, remote_lsn, oids): + """Verify subscription state, relations, origin LSN and replicated rows.""" + tab_upgraded_oid, tab_upgraded1_oid, tab_upgraded2_oid = oids + + # Data inserted to the publisher while the new subscriber is down should be + # replicated once it is started. + pub.safe_psql( + "INSERT INTO tab_upgraded1 VALUES(51);\nINSERT INTO tab_upgraded2 VALUES(1);" + ) + + new_sub.start() + + # The subscription's running status, failover option, and + # retain_dead_tuples option should be preserved in the upgraded instance. + result = new_sub.safe_psql( + "SELECT subname, subenabled, subfailover, subretaindeadtuples " + "FROM pg_subscription ORDER BY subname" + ) + assert result == "regress_sub4|t|t|t\nregress_sub5|f|f|f", ( + "check that the subscription's running status, failover, and " + "retain_dead_tuples are preserved" + ) + + # Subscription relations should be preserved + result = new_sub.safe_psql( + "SELECT srrelid, srsubstate FROM pg_subscription_rel ORDER BY srrelid" + ) + assert result == "{}|r\n{}|r\n{}|i".format( + tab_upgraded_oid, tab_upgraded1_oid, tab_upgraded2_oid + ), ( + "there should be 3 rows in pg_subscription_rel(representing " + "tab_upgraded, tab_upgraded1 and tab_upgraded2)" + ) + + # The replication origin's remote_lsn should be preserved + sub_oid = new_sub.safe_psql( + "SELECT oid FROM pg_subscription WHERE subname = 'regress_sub4'" + ) + result = new_sub.safe_psql( + "SELECT remote_lsn FROM pg_replication_origin_status " + "WHERE external_id = 'pg_' || {}".format(sub_oid) + ) + assert result == remote_lsn, "remote_lsn should have been preserved" + + # The conflict detection slot should be created + result = new_sub.safe_psql( + "SELECT xmin IS NOT NULL from pg_replication_slots " + "WHERE slot_name = 'pg_conflict_detection'" + ) + assert result == "t", "conflict detection slot exists" + + +def _verify_resumed_sync(pub, new_sub): + """Resume initial sync, enable regress_sub5, and verify replicated rows.""" + new_sub.append_conf("max_logical_replication_workers = 10", "postgresql.conf") + new_sub.restart() + new_sub.safe_psql("ALTER SUBSCRIPTION regress_sub5 ENABLE") + new_sub.wait_for_subscription_sync(pub, "regress_sub5") + + # wait for regress_sub4 to catchup as well + pub.wait_for_catchup("regress_sub4") + + # Rows on tab_upgraded1 and tab_upgraded2 should have been replicated + result = new_sub.safe_psql("SELECT count(*) FROM tab_upgraded1") + assert result == "51", "check replicated inserts on new subscriber" + result = new_sub.safe_psql("SELECT count(*) FROM tab_upgraded2") + assert result == "1", ( + "check the data is synced after enabling the subscription for the " + "table that was in init state" + ) + + +def test_004_subscription(create_pg, pg_bin, tmp_check, monkeypatch): + """pg_upgrade of logical subscription across a same-version upgrade.""" + # Can be changed to test the other modes. + mode = os.environ.get("PG_TEST_PG_UPGRADE_MODE") or "--copy" + + # Initialize publisher node + publisher = create_pg("publisher", start=False, allows_streaming="logical") + publisher.start() + + # Initialize the old subscriber node + old_sub = create_pg("old_sub", start=False, allows_streaming="physical") + old_sub.start() + oldbindir = old_sub.config_data("--bindir") + + # Initialize the new subscriber + new_sub = create_pg("new_sub", start=False, allows_streaming="physical") + newbindir = new_sub.config_data("--bindir") + + # In a VPATH build, we'll be started in the source directory, but we want to + # run pg_upgrade in the build directory so that any files generated finish + # in it, like delete_old_cluster.{sh,bat}. + monkeypatch.chdir(tmp_check) + + # Remember a connection string for the publisher node. + connstr = publisher.connstr() + " dbname=postgres" + dirs = (connstr, oldbindir, newbindir) + + _check_insufficient_origins(pg_bin, publisher, old_sub, new_sub, dirs, mode) + _check_insufficient_slots(pg_bin, publisher, old_sub, new_sub, dirs, mode) + _check_invalid_relstate(pg_bin, publisher, old_sub, new_sub, dirs, mode) + + remote_lsn, oids = _setup_ready_and_init(publisher, old_sub, connstr) + + # Change configuration so that initial table sync does not get started + # automatically + new_sub.append_conf("max_logical_replication_workers = 0", "postgresql.conf") + + # Check that pg_upgrade is successful when all tables are in ready or in + # init state (tab_upgraded and tab_upgraded1 tables are in ready state and + # tab_upgraded2 table is in init state) along with retaining the + # replication origin's remote lsn, subscription's running status, failover + # option, and retain_dead_tuples option. + pg_bin.command_ok( + _upgrade_cmd(old_sub, new_sub, oldbindir, newbindir, mode), + "run of pg_upgrade for old instance when the subscription tables are " + "in init/ready state", + ) + assert not os.path.isdir( + os.path.join(new_sub.datadir, "pg_upgrade_output.d") + ), "pg_upgrade_output.d/ removed after successful pg_upgrade" + + _verify_upgraded_state(publisher, new_sub, remote_lsn, oids) + _verify_resumed_sync(publisher, new_sub) diff --git a/src/bin/pg_upgrade/pyt/test_005_char_signedness.py b/src/bin/pg_upgrade/pyt/test_005_char_signedness.py new file mode 100644 index 0000000000000..23563bda37522 --- /dev/null +++ b/src/bin/pg_upgrade/pyt/test_005_char_signedness.py @@ -0,0 +1,92 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_upgrade/t/005_char_signedness.pl. + +pg_upgrade propagates the default char data signedness from the old cluster. +After flipping the old cluster to 'unsigned' via pg_resetwal, pg_upgrade rejects +--set-char-signedness (it cannot be used upgrading from v18+) and, on a plain +run, the upgraded new cluster inherits the old cluster's unsigned signedness. +""" + +import os + + +def test_005_char_signedness(create_pg, pg_bin, tmp_check, monkeypatch): + """Char signedness is carried from the old cluster through pg_upgrade.""" + mode = os.environ.get("PG_TEST_PG_UPGRADE_MODE") or "--copy" + old = create_pg("old", start=False) + new = create_pg("new", start=False) + pg_bin.command_like( + ["pg_controldata", old.datadir], + r"Default char data signedness:\s+signed", + "default char signedness of old cluster is signed in control file", + ) + pg_bin.command_like( + ["pg_controldata", new.datadir], + r"Default char data signedness:\s+signed", + "default char signedness of new cluster is signed in control file", + ) + pg_bin.command_ok( + ["pg_resetwal", "--char-signedness", "unsigned", "--force", old.datadir], + "set old cluster's default char signedness to unsigned", + ) + pg_bin.command_like( + ["pg_controldata", old.datadir], + r"Default char data signedness:\s+unsigned", + "updated default char signedness is unsigned in control file", + ) + monkeypatch.chdir(tmp_check) + pg_bin.command_checks_all( + [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + old.datadir, + "--new-datadir", + new.datadir, + "--old-bindir", + old.config_data("--bindir"), + "--new-bindir", + new.config_data("--bindir"), + "--socketdir", + new.host, + "--old-port", + str(old.port), + "--new-port", + str(new.port), + "--set-char-signedness", + "signed", + mode, + ], + 1, + [r"option --set-char-signedness cannot be used"], + [], + "--set-char-signedness option cannot be used for upgrading from v18 or later", + ) + pg_bin.command_ok( + [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + old.datadir, + "--new-datadir", + new.datadir, + "--old-bindir", + old.config_data("--bindir"), + "--new-bindir", + new.config_data("--bindir"), + "--socketdir", + new.host, + "--old-port", + str(old.port), + "--new-port", + str(new.port), + mode, + ], + "run of pg_upgrade", + ) + pg_bin.command_like( + ["pg_controldata", new.datadir], + r"Default char data signedness:\s+unsigned", + "the default char signedness is updated during pg_upgrade", + ) diff --git a/src/bin/pg_upgrade/pyt/test_006_transfer_modes.py b/src/bin/pg_upgrade/pyt/test_006_transfer_modes.py new file mode 100644 index 0000000000000..a0ea27881fac1 --- /dev/null +++ b/src/bin/pg_upgrade/pyt/test_006_transfer_modes.py @@ -0,0 +1,209 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_upgrade/t/006_transfer_modes.pl. + +Tests for file transfer modes. + +This is a same-version port: old and new clusters are both built from this tree +(v19+), so $ENV{oldinstall} is never set. The non-in-place tablespace tests +(gated on oldinstall in the Perl original) are therefore not exercised; the +in-place tablespace tests (gated on old pg_version >= 10) do run. The +old-version-too-old skip for --swap (pg_version < 10) likewise never applies. +""" + +_NOT_SUPPORTED_RE = ( + r".* not supported on this platform" + r"|could not .* between old and new data directories: .*" +) + + +def _create_test_objects(old): + """Create a small variety of simple test objects on the old cluster. + + We'll check that these reach the new version after upgrading. Includes the + in-place tablespace objects (old pg_version >= 10 in the same-version port) + and large objects. + """ + old.start() + old.safe_psql("CREATE TABLE test1 AS SELECT generate_series(1, 100)") + old.safe_psql("CREATE DATABASE testdb1") + old.safe_psql("CREATE TABLE test2 AS SELECT generate_series(200, 300)", "testdb1") + old.safe_psql("VACUUM FULL test2", "testdb1") + old.safe_psql("CREATE SEQUENCE testseq START 5432", "testdb1") + + # In-place tablespaces (available as far back as v10). + old.safe_psql("CREATE TABLESPACE inplc_tblspc LOCATION ''") + old.safe_psql("CREATE DATABASE testdb3 TABLESPACE inplc_tblspc") + old.safe_psql( + "CREATE TABLE test5 TABLESPACE inplc_tblspc " + "AS SELECT generate_series(503, 606)" + ) + old.safe_psql("CREATE TABLE test6 AS SELECT generate_series(607, 711)", "testdb3") + + # While we are here, test handling of large objects. + old.safe_psql( + r""" + CREATE ROLE regress_lo_1; + CREATE ROLE regress_lo_2; + + SELECT lo_from_bytea(4532, '\xffffff00'); + COMMENT ON LARGE OBJECT 4532 IS 'test'; + + SELECT lo_from_bytea(4533, '\x0f0f0f0f'); + ALTER LARGE OBJECT 4533 OWNER TO regress_lo_1; + GRANT SELECT ON LARGE OBJECT 4533 TO regress_lo_2; + """ + ) + + +def _create_seclabel_objects(old): + """Create the dummy_seclabel extension and a labelled large object.""" + old.safe_psql( + r""" + CREATE EXTENSION dummy_seclabel; + + SELECT lo_from_bytea(4534, '\x00ffffff'); + SECURITY LABEL ON LARGE OBJECT 4534 IS 'classified'; + """ + ) + + +def _verify_test_objects(new, mode): + """Verify the simple test objects reached the new version after upgrade.""" + assert ( + new.safe_psql("SELECT COUNT(*) FROM test1") == "100" + ), "test1 data after pg_upgrade {}".format(mode) + assert ( + new.safe_psql("SELECT COUNT(*) FROM test2", "testdb1") == "101" + ), "test2 data after pg_upgrade {}".format(mode) + assert ( + new.safe_psql("SELECT nextval('testseq')", "testdb1") == "5432" + ), "sequence data after pg_upgrade {}".format(mode) + + # In-place tablespaces. + assert ( + new.safe_psql("SELECT COUNT(*) FROM test5") == "104" + ), "test5 data after pg_upgrade {}".format(mode) + assert ( + new.safe_psql("SELECT COUNT(*) FROM test6", "testdb3") == "105" + ), "test6 data after pg_upgrade {}".format(mode) + + +def _verify_large_objects(new): + """Verify large-object contents, owner and ACL reached the new version.""" + assert ( + new.safe_psql("SELECT lo_get(4532)") == r"\xffffff00" + ), "LO contents after upgrade" + assert ( + new.safe_psql("SELECT obj_description(4532, 'pg_largeobject')") == "test" + ), "comment on LO after pg_upgrade" + + assert ( + new.safe_psql("SELECT lo_get(4533)") == r"\x0f0f0f0f" + ), "LO contents after upgrade" + assert ( + new.safe_psql( + "SELECT lomowner::regrole FROM pg_largeobject_metadata WHERE oid = 4533" + ) + == "regress_lo_1" + ), "LO owner after upgrade" + assert ( + new.safe_psql("SELECT lomacl FROM pg_largeobject_metadata WHERE oid = 4533") + == "{regress_lo_1=rw/regress_lo_1,regress_lo_2=r/regress_lo_1}" + ), "LO ACL after upgrade" + + +def _verify_seclabel(new): + """Verify the security label on the labelled large object after upgrade.""" + assert ( + new.safe_psql("SELECT lo_get(4534)") == r"\x00ffffff" + ), "LO contents after upgrade" + result = new.safe_psql( + "SELECT label FROM pg_seclabel WHERE objoid = 4534 " + "AND classoid = 'pg_largeobject'::regclass" + ) + assert result == "classified", "seclabel on LO after pg_upgrade" + + +def _pg_upgrade_cmd(old, new, mode): + """Build the pg_upgrade command line for the given transfer mode.""" + return [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + str(old.datadir), + "--new-datadir", + str(new.datadir), + "--old-bindir", + old.config_data("--bindir"), + "--new-bindir", + new.config_data("--bindir"), + "--socketdir", + str(new.host), + "--old-port", + str(old.port), + "--new-port", + str(new.port), + mode, + ] + + +def _test_mode(create_pg, pg_bin, mode, index): + """Run pg_upgrade once with the given transfer mode and verify the result. + + index disambiguates per-mode node names within the single test process, + standing in for the Perl test's fixed 'old'/'new' names (one mode per run). + """ + old = create_pg("old{}".format(index), start=False) + new = create_pg("new{}".format(index), start=False) + + # allow_in_place_tablespaces is available as far back as v10. + new.append_conf("allow_in_place_tablespaces = true", "postgresql.conf") + old.append_conf("allow_in_place_tablespaces = true", "postgresql.conf") + + # We can only test security labels if both the old and new installations + # have dummy_seclabel. + test_seclabel = True + old.start() + if not old.check_extension("dummy_seclabel"): + test_seclabel = False + old.stop() + new.start() + if not new.check_extension("dummy_seclabel"): + test_seclabel = False + new.stop() + + _create_test_objects(old) + if test_seclabel: + _create_seclabel_objects(old) + old.stop() + + result = pg_bin.command_ok_or_fails_like( + _pg_upgrade_cmd(old, new, mode), + _NOT_SUPPORTED_RE, + r"^$", + "pg_upgrade with transfer mode {}".format(mode), + ) + + # If pg_upgrade was successful, check that all of our test objects reached + # the new version. + if result: + new.start() + _verify_test_objects(new, mode) + _verify_large_objects(new) + if test_seclabel: + _verify_seclabel(new) + new.stop() + + +def test_006_transfer_modes(create_pg, pg_bin, tmp_check, monkeypatch): + """Exercise every pg_upgrade transfer mode on a same-version upgrade.""" + # Run pg_upgrade in tmp_check to avoid leaving files like + # delete_old_cluster.{sh,bat} in the source directory for VPATH and meson + # builds. + tmp_check.mkdir(parents=True, exist_ok=True) + monkeypatch.chdir(tmp_check) + + modes = ["--clone", "--copy", "--copy-file-range", "--link", "--swap"] + for index, mode in enumerate(modes): + _test_mode(create_pg, pg_bin, mode, index) diff --git a/src/bin/pg_upgrade/pyt/test_007_multixact_conversion.py b/src/bin/pg_upgrade/pyt/test_007_multixact_conversion.py new file mode 100644 index 0000000000000..63af18585161f --- /dev/null +++ b/src/bin/pg_upgrade/pyt/test_007_multixact_conversion.py @@ -0,0 +1,291 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_upgrade/t/007_multixact_conversion.pl. + +Version 19 expanded MultiXactOffset from 32 to 64 bits. Upgrading across that +requires rewriting the SLRU files to the new format. This file contains tests +for the conversion. + +To run, set 'oldinstall' ENV variable to point to a pre-v19 installation. If +it's not set, or if it points to a v19 or above installation, this still +performs a very basic test, upgrading a cluster with some multixacts. It's not +very interesting, however, because there's no conversion involved in that case. + +This is a same-version port: old and new clusters are both built from this tree +(v19+), so only the "basic" scenario runs; the "wraparound" scenario requires a +pre-v19 'oldinstall' and is skipped (see test_007_wraparound). +""" + +import os +import re + +import pytest + +import pypg + +_NCLIENTS = 20 +_UPDATE_EVERY = 13 +_ABORT_EVERY = 11 + + +def _read_multixid_fields(pg_bin, node): + """Read multixid related fields from the control file. + + Returns (oldest_multi_xid, next_multi_xid, next_multi_offset) as strings. + """ + result = pg_bin.run_command(["pg_controldata", str(node.datadir)]) + stdout = result.stdout + match = re.search( + r"^Latest checkpoint's oldestMultiXid:\s*(.*)$", stdout, re.MULTILINE + ) + assert match, "could not read oldestMultiXid from pg_controldata" + oldest_multi_xid = match.group(1) + match = re.search( + r"^Latest checkpoint's NextMultiXactId:\s*(.*)$", stdout, re.MULTILINE + ) + assert match, "could not read NextMultiXactId from pg_controldata" + next_multi_xid = match.group(1) + match = re.search( + r"^Latest checkpoint's NextMultiOffset:\s*(.*)$", stdout, re.MULTILINE + ) + assert match, "could not read NextMultiOffset from pg_controldata" + next_multi_offset = match.group(1) + return (oldest_multi_xid, next_multi_xid, next_multi_offset) + + +def _open_workload_connections(node, binnode, connection_timeout_secs): + """Open _NCLIENTS+1 background psql connections, each in a transaction. + + The Perl original borrows the new installation's psql *binary* for + BackgroundPsql feature support but connects every session to the old node + (``connstr => node->connstr``). In this same-version port both binaries are + identical, so the connections are opened against ``node`` directly; binnode + is accepted only to preserve the helper's signature. + """ + del binnode # same-version: node's own psql is used, see docstring + connections = [] + for _ in range(_NCLIENTS + 1): + conn = node.background_psql(timeout=connection_timeout_secs) + conn.query_safe("SET log_statement=none") + conn.query_safe("SET enable_seqscan=off") + conn.query_safe("BEGIN") + connections.append(conn) + return connections + + +def _workload_step_sql(i): + """Return the SQL for one round of the multixid-generating workload.""" + if i % _UPDATE_EVERY == 0: + return ( + "UPDATE mxofftest SET n_updated = n_updated + 1 " + "WHERE id = {} % 50;".format(i) + ) + threshold = int(i / 3000 * 50) + return ( + "select count(*) from (\n" + " SELECT * FROM mxofftest WHERE id >= {} FOR KEY SHARE\n" + ") as x".format(threshold) + ) + + +def _mxact_workload(node, binnode): + """A workload that consumes multixids. + + The purpose of this is to generate some multixids in the old cluster, so + that we can test upgrading them. The workload is a mix of KEY SHARE locking + queries and UPDATEs, and commits and aborts, to generate a mix of multixids + with different statuses. It consumes around 3000 multixids with 60000 + members in total. That's enough to span more than one multixids 'offsets' + page, and more than one 'members' segment with the default block size. + + The workload leaves behind a table called 'mxofftest' containing a small + number of rows referencing some of the generated multixids. + """ + node.start() + node.safe_psql( + "CREATE TABLE mxofftest (id INT PRIMARY KEY, n_updated INT)" + " WITH (AUTOVACUUM_ENABLED=FALSE);\n" + "INSERT INTO mxofftest SELECT G, 0 FROM GENERATE_SERIES(1, 50) G;" + ) + + # Bump the timeout on the connections to avoid false negatives on slow test + # systems. The timeout covers the whole duration that the connections are + # open rather than the individual queries. + connection_timeout_secs = 4 * pypg.test_timeout_default() + connections = _open_workload_connections(node, binnode, connection_timeout_secs) + + # Run queries cycling through the connections in a round-robin fashion. We + # keep a transaction open in each connection at all times, and lock/update + # the rows. With 20 connections, each SELECT FOR KEY SHARE query generates + # a new multixid, containing the XIDs of all the transactions running at the + # time, ie. around 20 XIDs. + for i in range(3000): + conn = connections[i % _NCLIENTS] + conn.query_safe("ABORT" if i % _ABORT_EVERY == 0 else "COMMIT") + conn.query_safe("BEGIN") + conn.query_safe(_workload_step_sql(i)) + + for conn in connections: + conn.quit() + + node.stop() + + +def _get_test_table_contents(node, tempdir, filename): + """Write the 'mxofftest' table contents to a file; return its path.""" + contents = node.safe_psql("SELECT ctid, xmin, xmax, * FROM mxofftest") + path = os.path.join(tempdir, filename) + with open(path, "w", encoding="utf-8") as fh: + fh.write(contents) + return path + + +def _get_updating_multixact_members(node, from_, to, tempdir, filename): + """Write the members of all updating multixids in the given range to a file. + + Returns the file path. + """ + path = os.path.join(tempdir, filename) + with open(path, "w", encoding="utf-8") as fh: + if to >= from_: + res = node.safe_psql( + "SELECT multi, mode, xid\n" + "FROM generate_series({from_}, {to} - 1) as multi,\n" + " pg_get_multixact_members(multi::text::xid)\n" + "WHERE mode not in ('keysh', 'sh');".format(from_=from_, to=to) + ) + fh.write(res) + else: + # Multixids wrapped around. Split the query into two parts, before + # and after the wraparound. + res = node.safe_psql( + "SELECT multi, mode, xid\n" + "FROM generate_series({from_}, 4294967295) as multi,\n" + " pg_get_multixact_members(multi::text::xid)\n" + "WHERE mode not in ('keysh', 'sh');".format(from_=from_) + ) + fh.write(res) + res = node.safe_psql( + "SELECT multi, mode, xid\n" + "FROM generate_series(1, {to} - 1) as multi,\n" + " pg_get_multixact_members(multi::text::xid)\n" + "WHERE mode not in ('keysh', 'sh');".format(to=to) + ) + fh.write(res) + return path + + +def _build_pg_upgrade_cmd(old, new): + """Build the same-version pg_upgrade command (no mode, matching the .pl).""" + return [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + str(old.datadir), + "--new-datadir", + str(new.datadir), + "--old-bindir", + old.config_data("--bindir"), + "--new-bindir", + new.config_data("--bindir"), + "--socketdir", + str(new.host), + "--old-port", + str(old.port), + "--new-port", + str(new.port), + ] + + +def _upgrade_and_compare(pg_bin, tag, oldnode, newnode, tempdir): + """Dump data on old version, run pg_upgrade, compare data after upgrade.""" + pg_bin.command_ok( + _build_pg_upgrade_cmd(oldnode, newnode), + "run of pg_upgrade for new instance", + ) + + # Dump contents of the test table, and the status of all updating multixids + # from the old cluster. (Locking-only multixids don't need to be preserved + # so we ignore those.) + # + # Note: we do this *after* running pg_upgrade, to ensure that we don't set + # all the hint bits before upgrade by doing the SELECT on the table. + multixids_start, multixids_end, _ = _read_multixid_fields(pg_bin, oldnode) + multixids_start = int(multixids_start) + multixids_end = int(multixids_end) + oldnode.start() + old_table_contents = _get_test_table_contents( + oldnode, tempdir, "oldnode_{}_table_contents".format(tag) + ) + old_multixacts = _get_updating_multixact_members( + oldnode, + multixids_start, + multixids_end, + tempdir, + "oldnode_{}_multixacts".format(tag), + ) + oldnode.stop() + + # Compare them with the upgraded cluster + newnode.start() + new_table_contents = _get_test_table_contents( + newnode, tempdir, "newnode_{}_table_contents".format(tag) + ) + new_multixacts = _get_updating_multixact_members( + newnode, + multixids_start, + multixids_end, + tempdir, + "newnode_{}_multixacts".format(tag), + ) + newnode.stop() + + pypg.compare_files( + old_table_contents, + new_table_contents, + "test table contents from original and upgraded clusters match", + ) + pypg.compare_files( + old_multixacts, + new_multixacts, + "multixact members from original and upgraded clusters match", + ) + + +def test_007_basic(create_pg, pg_bin, tmp_check, tmp_path, monkeypatch): + """Basic scenario: create a cluster, run a multixid workload, then upgrade. + + This works even if the old and new version is the same, although it's not + very interesting as the conversion routines only run when upgrading from a + pre-v19 cluster. + """ + tag = "basic" + old = create_pg("{}_oldnode".format(tag), start=False, extra=["-k"]) + new = create_pg("{}_newnode".format(tag), start=False) + + # In a VPATH build, we'll be started in the source directory, but we want to + # run pg_upgrade in the build directory so that any files generated finish + # in it, like delete_old_cluster.{sh,bat}. + monkeypatch.chdir(tmp_check) + + _mxact_workload(old, new) + _upgrade_and_compare(pg_bin, tag, old, new, str(tmp_path)) + + +def test_007_wraparound(): + """Wraparound scenario: requires a pre-v19 'oldinstall' to reset the old + cluster to just before 32-bit offset wraparound using the old file format. + + The same-version port has no pre-v19 old install available, so the SLRU + conversion path cannot be exercised; the Perl original likewise skips this + when the old version is >= 19devel. + """ + if os.environ.get("oldinstall"): + pytest.fail( + "oldinstall is set; wraparound conversion is not ported " + "(needs the pre-v19 file-format reset hacks)" + ) + pytest.skip( + "skipping mxoffset conversion tests because upgrading from the old " + "version does not require conversion" + ) diff --git a/src/bin/pg_upgrade/pyt/test_008_extension_control_path.py b/src/bin/pg_upgrade/pyt/test_008_extension_control_path.py new file mode 100644 index 0000000000000..a2975425d6e05 --- /dev/null +++ b/src/bin/pg_upgrade/pyt/test_008_extension_control_path.py @@ -0,0 +1,151 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +"""Port of src/bin/pg_upgrade/t/008_extension_control_path.pl. + +Test pg_upgrade with the extension_control_path GUC active: a C extension is +installed from a custom directory layout (its .control/.sql under +``extension/`` and its .so under ``lib/``), discovered via +``extension_control_path`` and ``dynamic_library_path``. The extension must +keep working after the cluster is upgraded into a new cluster configured with +the same paths. + +This is a same-version port: ``oldinstall`` is unset, so both clusters are the +current build. ``TEST_EXT_LIB`` (the built test_ext shared module) is supplied +by the meson pytest env block; if it is genuinely unavailable the test skips +with a precise reason. +""" + +import os +import shutil + +import pytest + + +def _create_extension_files(ext_name, ext_dir): + """Write the extension's .control and --1.0.sql into ext_dir/extension/. + + module_pathname uses the ``$libdir/`` prefix to mimic the majority of + extensions, mirroring the Perl create_extension_files helper. + """ + control_path = os.path.join(ext_dir, "extension", ext_name + ".control") + with open(control_path, "w", encoding="utf-8") as cf: + cf.write( + "comment = 'Test C extension for pg_upgrade + extension_control_path'\n" + ) + cf.write("default_version = '1.0'\n") + cf.write("module_pathname = '$libdir/{}'\n".format(ext_name)) + cf.write("relocatable = true\n") + + sql_path = os.path.join(ext_dir, "extension", "{}--1.0.sql".format(ext_name)) + with open(sql_path, "w", encoding="utf-8") as sqlf: + sqlf.write("/* {}--1.0.sql */\n".format(ext_name)) + sqlf.write( + "-- complain if script is sourced in psql, rather than via " + "CREATE EXTENSION\n" + ) + sqlf.write( + '\\echo Use "CREATE EXTENSION {}" to load this file. ' + "\\quit\n".format(ext_name) + ) + sqlf.write("CREATE FUNCTION test_ext()\n") + sqlf.write("RETURNS void AS 'MODULE_PATHNAME'\n") + sqlf.write("LANGUAGE C;\n") + + +def _control_path_conf(ext_path, ext_lib_path): + """Return the postgresql.conf snippet wiring up the extension's paths.""" + sep = ":" # POSIX path separator ($windows_os is false here) + return ( + "\nextension_control_path = '$system{sep}{ext}'\n" + "dynamic_library_path = '$libdir{sep}{lib}'\n".format( + sep=sep, ext=ext_path, lib=ext_lib_path + ) + ) + + +def _assert_extension_works(node, when): + """Assert SELECT test_ext() succeeds and emits its NOTICE.""" + result = node.psql_capture("SELECT test_ext()") + assert result.rc == 0, "extension works {} upgrade".format(when) + assert "NOTICE: running successful" in result.stderr, "extension working" + + +def test_008_extension_control_path(create_pg, pg_bin, tmp_path, monkeypatch): + """pg_upgrade preserves an extension installed via extension_control_path.""" + # Make sure the extension's .so path is provided by the meson env block. + ext_lib_so = os.environ.get("TEST_EXT_LIB") + if not ext_lib_so or not os.path.exists(ext_lib_so): + pytest.skip( + "TEST_EXT_LIB is not set to a built test_ext shared module " + "(needed by the extension_control_path test)" + ) + + # Create the custom extension directory layout: + # ext_dir/extension/ -- .control and .sql files + # ext_dir/lib/ -- .so file + ext_dir = str(tmp_path / "ext") + os.makedirs(os.path.join(ext_dir, "extension")) + os.makedirs(os.path.join(ext_dir, "lib")) + ext_lib = os.path.join(ext_dir, "lib") + + # Copy the .so file into the lib/ subdirectory. + shutil.copy(ext_lib_so, ext_lib) + + _create_extension_files("test_ext", ext_dir) + + extension_control_path_conf = _control_path_conf(ext_dir, ext_lib) + + old = create_pg("old", start=False) + # Configure extension_control_path so the .control file is found in our + # extension/ directory, and dynamic_library_path so the .so is found in + # lib/. + old.append_conf(extension_control_path_conf) + old.start() + + # CREATE EXTENSION 'test_ext' + old.safe_psql("CREATE EXTENSION test_ext") + + # Verify the extension works before the upgrade. + _assert_extension_works(old, "before") + + old.stop() + + new = create_pg("new", start=False) + # Pre-configure the new cluster with dynamic_library_path and + # extension_control_path before running pg_upgrade. + new.append_conf(extension_control_path_conf) + + # In a VPATH build, we'll be started in the source directory, but we want to + # run pg_upgrade in the build directory so that any files generated finish + # in it, like delete_old_cluster.{sh,bat}. + monkeypatch.chdir(tmp_path) + + pg_bin.command_ok( + [ + "pg_upgrade", + "--no-sync", + "--old-datadir", + str(old.datadir), + "--new-datadir", + str(new.datadir), + "--old-bindir", + old.config_data("--bindir"), + "--new-bindir", + new.config_data("--bindir"), + "--socketdir", + str(new.host), + "--old-port", + str(old.port), + "--new-port", + str(new.port), + "--copy", + ], + "pg_upgrade succeeds with extension installed via extension_control_path", + ) + + new.start() + + # Verify the extension still works after the upgrade. + _assert_extension_works(new, "after") + + new.stop() From b3d95a0d74f7b00a198bd50da6722e506a97725f Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:33 -0400 Subject: [PATCH 13/36] pytest: port the src/test/recovery TAP suite Port all recovery TAP tests (streaming/sync replication, archiving, PITR, timelines, two-phase, crash/restart, logical decoding on standby, slot limits, checkpoint and promotion scenarios) to pytest, running beside the Perl suite. Co-authored-by: Greg Burd --- src/test/recovery/Makefile | 1 + src/test/recovery/meson.build | 59 ++ src/test/recovery/pyt/test_001_stream_rep.py | 364 +++++++ src/test/recovery/pyt/test_002_archiving.py | 106 ++ .../recovery/pyt/test_003_recovery_targets.py | 226 +++++ .../recovery/pyt/test_004_timeline_switch.py | 106 ++ .../recovery/pyt/test_005_replay_delay.py | 84 ++ .../recovery/pyt/test_006_logical_decoding.py | 250 +++++ src/test/recovery/pyt/test_007_sync_rep.py | 131 +++ .../recovery/pyt/test_008_fsm_truncation.py | 67 ++ src/test/recovery/pyt/test_009_twophase.py | 287 ++++++ .../test_010_logical_decoding_timelines.py | 137 +++ .../recovery/pyt/test_012_subtransactions.py | 111 ++ .../recovery/pyt/test_013_crash_restart.py | 150 +++ .../recovery/pyt/test_014_unlogged_reinit.py | 96 ++ .../recovery/pyt/test_015_promotion_pages.py | 58 ++ .../recovery/pyt/test_016_min_consistency.py | 96 ++ src/test/recovery/pyt/test_017_shm.py | 91 ++ .../recovery/pyt/test_018_wal_optimize.py | 301 ++++++ .../recovery/pyt/test_019_replslot_limit.py | 315 ++++++ .../recovery/pyt/test_020_archive_status.py | 187 ++++ .../recovery/pyt/test_021_row_visibility.py | 112 +++ .../recovery/pyt/test_022_crash_temp_files.py | 92 ++ .../pyt/test_023_pitr_prepared_xact.py | 63 ++ .../recovery/pyt/test_024_archive_recovery.py | 81 ++ .../pyt/test_025_stuck_on_old_timeline.py | 56 ++ .../pyt/test_026_overwrite_contrecord.py | 86 ++ .../recovery/pyt/test_027_stream_regress.py | 175 ++++ .../recovery/pyt/test_028_pitr_timelines.py | 89 ++ .../recovery/pyt/test_029_stats_restart.py | 201 ++++ .../pyt/test_030_stats_cleanup_replica.py | 131 +++ .../pyt/test_031_recovery_conflict.py | 228 +++++ .../pyt/test_032_relfilenode_reuse.py | 122 +++ .../recovery/pyt/test_033_replay_tsp_drops.py | 102 ++ .../recovery/pyt/test_034_create_database.py | 43 + .../pyt/test_035_standby_logical_decoding.py | 922 +++++++++++++++++ .../pyt/test_036_truncated_dropped.py | 72 ++ .../recovery/pyt/test_037_invalid_database.py | 105 ++ .../test_038_save_logical_slots_shutdown.py | 77 ++ src/test/recovery/pyt/test_039_end_of_wal.py | 231 +++++ .../test_040_standby_failover_slots_sync.py | 829 +++++++++++++++ .../pyt/test_041_checkpoint_at_promote.py | 67 ++ .../recovery/pyt/test_042_low_level_backup.py | 72 ++ .../pyt/test_043_no_contrecord_switch.py | 82 ++ .../pyt/test_044_invalidate_inactive_slots.py | 59 ++ .../pyt/test_045_archive_restartpoint.py | 45 + .../pyt/test_046_checkpoint_logical_slot.py | 137 +++ .../pyt/test_047_checkpoint_physical_slot.py | 66 ++ .../pyt/test_048_vacuum_horizon_floor.py | 130 +++ .../recovery/pyt/test_049_wait_for_lsn.py | 947 ++++++++++++++++++ .../pyt/test_050_redo_segment_missing.py | 66 ++ .../pyt/test_051_effective_wal_level.py | 220 ++++ .../test_052_checkpoint_segment_missing.py | 45 + .../test_053_standby_login_event_trigger.py | 76 ++ 54 files changed, 9052 insertions(+) create mode 100644 src/test/recovery/pyt/test_001_stream_rep.py create mode 100644 src/test/recovery/pyt/test_002_archiving.py create mode 100644 src/test/recovery/pyt/test_003_recovery_targets.py create mode 100644 src/test/recovery/pyt/test_004_timeline_switch.py create mode 100644 src/test/recovery/pyt/test_005_replay_delay.py create mode 100644 src/test/recovery/pyt/test_006_logical_decoding.py create mode 100644 src/test/recovery/pyt/test_007_sync_rep.py create mode 100644 src/test/recovery/pyt/test_008_fsm_truncation.py create mode 100644 src/test/recovery/pyt/test_009_twophase.py create mode 100644 src/test/recovery/pyt/test_010_logical_decoding_timelines.py create mode 100644 src/test/recovery/pyt/test_012_subtransactions.py create mode 100644 src/test/recovery/pyt/test_013_crash_restart.py create mode 100644 src/test/recovery/pyt/test_014_unlogged_reinit.py create mode 100644 src/test/recovery/pyt/test_015_promotion_pages.py create mode 100644 src/test/recovery/pyt/test_016_min_consistency.py create mode 100644 src/test/recovery/pyt/test_017_shm.py create mode 100644 src/test/recovery/pyt/test_018_wal_optimize.py create mode 100644 src/test/recovery/pyt/test_019_replslot_limit.py create mode 100644 src/test/recovery/pyt/test_020_archive_status.py create mode 100644 src/test/recovery/pyt/test_021_row_visibility.py create mode 100644 src/test/recovery/pyt/test_022_crash_temp_files.py create mode 100644 src/test/recovery/pyt/test_023_pitr_prepared_xact.py create mode 100644 src/test/recovery/pyt/test_024_archive_recovery.py create mode 100644 src/test/recovery/pyt/test_025_stuck_on_old_timeline.py create mode 100644 src/test/recovery/pyt/test_026_overwrite_contrecord.py create mode 100644 src/test/recovery/pyt/test_027_stream_regress.py create mode 100644 src/test/recovery/pyt/test_028_pitr_timelines.py create mode 100644 src/test/recovery/pyt/test_029_stats_restart.py create mode 100644 src/test/recovery/pyt/test_030_stats_cleanup_replica.py create mode 100644 src/test/recovery/pyt/test_031_recovery_conflict.py create mode 100644 src/test/recovery/pyt/test_032_relfilenode_reuse.py create mode 100644 src/test/recovery/pyt/test_033_replay_tsp_drops.py create mode 100644 src/test/recovery/pyt/test_034_create_database.py create mode 100644 src/test/recovery/pyt/test_035_standby_logical_decoding.py create mode 100644 src/test/recovery/pyt/test_036_truncated_dropped.py create mode 100644 src/test/recovery/pyt/test_037_invalid_database.py create mode 100644 src/test/recovery/pyt/test_038_save_logical_slots_shutdown.py create mode 100644 src/test/recovery/pyt/test_039_end_of_wal.py create mode 100644 src/test/recovery/pyt/test_040_standby_failover_slots_sync.py create mode 100644 src/test/recovery/pyt/test_041_checkpoint_at_promote.py create mode 100644 src/test/recovery/pyt/test_042_low_level_backup.py create mode 100644 src/test/recovery/pyt/test_043_no_contrecord_switch.py create mode 100644 src/test/recovery/pyt/test_044_invalidate_inactive_slots.py create mode 100644 src/test/recovery/pyt/test_045_archive_restartpoint.py create mode 100644 src/test/recovery/pyt/test_046_checkpoint_logical_slot.py create mode 100644 src/test/recovery/pyt/test_047_checkpoint_physical_slot.py create mode 100644 src/test/recovery/pyt/test_048_vacuum_horizon_floor.py create mode 100644 src/test/recovery/pyt/test_049_wait_for_lsn.py create mode 100644 src/test/recovery/pyt/test_050_redo_segment_missing.py create mode 100644 src/test/recovery/pyt/test_051_effective_wal_level.py create mode 100644 src/test/recovery/pyt/test_052_checkpoint_segment_missing.py create mode 100644 src/test/recovery/pyt/test_053_standby_login_event_trigger.py diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile index d41aaaf8ae13d..ed7f93845843f 100644 --- a/src/test/recovery/Makefile +++ b/src/test/recovery/Makefile @@ -26,6 +26,7 @@ export REGRESS_SHLIB check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 9eb8ed114254a..ac22b9bef2d0f 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -64,4 +64,63 @@ tests += { 't/053_standby_login_event_trigger.pl', ], }, + 'pytest': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_006_logical_decoding.py', + 'pyt/test_012_subtransactions.py', + 'pyt/test_021_row_visibility.py', + 'pyt/test_031_recovery_conflict.py', + 'pyt/test_032_relfilenode_reuse.py', + 'pyt/test_039_end_of_wal.py', + 'pyt/test_041_checkpoint_at_promote.py', + 'pyt/test_042_low_level_backup.py', + 'pyt/test_009_twophase.py', + 'pyt/test_010_logical_decoding_timelines.py', + 'pyt/test_044_invalidate_inactive_slots.py', + 'pyt/test_046_checkpoint_logical_slot.py', + 'pyt/test_047_checkpoint_physical_slot.py', + 'pyt/test_048_vacuum_horizon_floor.py', + 'pyt/test_049_wait_for_lsn.py', + 'pyt/test_050_redo_segment_missing.py', + 'pyt/test_001_stream_rep.py', + 'pyt/test_002_archiving.py', + 'pyt/test_003_recovery_targets.py', + 'pyt/test_004_timeline_switch.py', + 'pyt/test_005_replay_delay.py', + 'pyt/test_007_sync_rep.py', + 'pyt/test_008_fsm_truncation.py', + 'pyt/test_013_crash_restart.py', + 'pyt/test_014_unlogged_reinit.py', + 'pyt/test_015_promotion_pages.py', + 'pyt/test_016_min_consistency.py', + 'pyt/test_017_shm.py', + 'pyt/test_018_wal_optimize.py', + 'pyt/test_019_replslot_limit.py', + 'pyt/test_020_archive_status.py', + 'pyt/test_022_crash_temp_files.py', + 'pyt/test_023_pitr_prepared_xact.py', + 'pyt/test_024_archive_recovery.py', + 'pyt/test_025_stuck_on_old_timeline.py', + 'pyt/test_026_overwrite_contrecord.py', + 'pyt/test_027_stream_regress.py', + 'pyt/test_028_pitr_timelines.py', + 'pyt/test_029_stats_restart.py', + 'pyt/test_030_stats_cleanup_replica.py', + 'pyt/test_033_replay_tsp_drops.py', + 'pyt/test_034_create_database.py', + 'pyt/test_035_standby_logical_decoding.py', + 'pyt/test_036_truncated_dropped.py', + 'pyt/test_037_invalid_database.py', + 'pyt/test_038_save_logical_slots_shutdown.py', + 'pyt/test_040_standby_failover_slots_sync.py', + 'pyt/test_043_no_contrecord_switch.py', + 'pyt/test_045_archive_restartpoint.py', + 'pyt/test_051_effective_wal_level.py', + 'pyt/test_052_checkpoint_segment_missing.py', + 'pyt/test_053_standby_login_event_trigger.py', + ], + }, } diff --git a/src/test/recovery/pyt/test_001_stream_rep.py b/src/test/recovery/pyt/test_001_stream_rep.py new file mode 100644 index 0000000000000..1e165b1de2e67 --- /dev/null +++ b/src/test/recovery/pyt/test_001_stream_rep.py @@ -0,0 +1,364 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/001_stream_rep.pl. + +Streaming replication end to end: a primary with two cascading standbys, content +and sequence streaming, read-only enforcement on standbys, libpq +target_session_attrs routing across a multi-host connstr, SHOW/READ_REPLICATION_SLOT +over replication connections, physical-slot xmin tracking under +hot_standby_feedback, physical slot advance persisting across restart (and WAL +recycling), and BASE_BACKUP interlock plus cancellation. +""" + +import re + + +def _setup_cluster(create_pg): + primary = create_pg( + "primary", + allows_streaming=True, + auth_extra=["--create-role", "repl_role"], + start=False, + ) + primary.start() + primary.backup("my_backup") + standby1 = create_pg( + "standby_1", from_backup=(primary, "my_backup"), has_streaming=True, start=False + ) + standby1.start() + standby1.backup("my_backup") + primary.stop() + standby1.backup("my_backup_2") + primary.start() + standby2 = create_pg( + "standby_2", + from_backup=(standby1, "my_backup"), + has_streaming=True, + start=False, + ) + standby2.start() + return primary, standby1, standby2 + + +def test_001_stream_rep(create_pg): + """Streaming replication, routing, slot xmins, and BASE_BACKUP behavior.""" + primary, standby1, standby2 = _setup_cluster(create_pg) + primary.safe_psql("SELECT pg_stat_reset_shared('io')") + primary.safe_psql("CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a") + primary.safe_psql( + "CREATE TABLE user_logins(id serial, who text);\n" + "CREATE FUNCTION on_login_proc() RETURNS EVENT_TRIGGER AS $$\nBEGIN\n" + " IF NOT pg_is_in_recovery() THEN\n" + " INSERT INTO user_logins (who) VALUES (session_user);\n END IF;\n" + " IF session_user = 'regress_hacker' THEN\n" + " RAISE EXCEPTION 'You are not welcome!';\n END IF;\nEND;\n" + "$$ LANGUAGE plpgsql SECURITY DEFINER;\n" + "CREATE EVENT TRIGGER on_login_trigger ON login " + "EXECUTE FUNCTION on_login_proc();\n" + "ALTER EVENT TRIGGER on_login_trigger ENABLE ALWAYS;" + ) + primary.wait_for_replay_catchup(standby1) + standby1.wait_for_replay_catchup(standby2, primary) + assert standby1.safe_psql("SELECT count(*) FROM tab_int") == "1002" + assert standby2.safe_psql("SELECT count(*) FROM tab_int") == "1002" + assert ( + standby1.safe_psql( + "SELECT count(*) FROM pg_stat_recovery WHERE promote_triggered IS NOT NULL" + ) + == "1" + ), "check recovery state on standby 1" + _check_sequences(primary, standby1, standby2) + _target_session_attrs(primary, standby1, standby2) + _show_and_read_slot(primary) + _slot_xmins(primary, standby1, standby2) + _physical_slot_advance(primary, standby1, standby2) + _base_backup_interlock(primary) + + +def _check_sequences(primary, standby1, standby2): + primary.safe_psql("CREATE SEQUENCE seq1; SELECT nextval('seq1')") + primary.wait_for_replay_catchup(standby1) + standby1.wait_for_replay_catchup(standby2, primary) + assert standby1.safe_psql("SELECT * FROM seq1") == "33|0|t" + assert standby2.safe_psql("SELECT * FROM seq1") == "33|0|t" + primary.safe_psql("CREATE UNLOGGED SEQUENCE ulseq; SELECT nextval('ulseq')") + primary.wait_for_replay_catchup(standby1) + assert ( + standby1.safe_psql("SELECT pg_sequence_last_value('ulseq'::regclass) IS NULL") + == "t" + ), "pg_sequence_last_value() on unlogged sequence on standby 1" + assert ( + standby1.psql_capture("INSERT INTO tab_int VALUES (1)").rc == 3 + ), "read-only queries on standby 1" + assert ( + standby2.psql_capture("INSERT INTO tab_int VALUES (1)").rc == 3 + ), "read-only queries on standby 2" + + +def _tsa(node1, node2, target, mode, status): + connstr = "host={},{} port={},{} target_session_attrs={}".format( + node1.host, node2.host, node1.port, node2.port, mode + ) + res = node1.psql_capture( + "", + connstr=connstr, + extra_params=["--command", "SHOW port;"], + on_error_stop=False, + ) + if status == 0: + assert res.rc == 0 and res.stdout.strip() == str( + target.port + ), 'connect with mode "{}" and {},{} listed'.format( + mode, node1.name, node2.name + ) + else: + assert ( + res.rc == status and target is None + ), 'fail to connect with mode "{}"'.format(mode) + + +def _target_session_attrs(primary, standby1, standby2): + _tsa(primary, standby1, primary, "read-write", 0) + _tsa(standby1, primary, primary, "read-write", 0) + _tsa(primary, standby1, primary, "any", 0) + _tsa(standby1, primary, standby1, "any", 0) + _tsa(primary, standby1, primary, "primary", 0) + _tsa(standby1, primary, primary, "primary", 0) + _tsa(primary, standby1, standby1, "read-only", 0) + _tsa(standby1, primary, standby1, "read-only", 0) + _tsa(primary, primary, primary, "prefer-standby", 0) + _tsa(primary, standby1, standby1, "prefer-standby", 0) + _tsa(standby1, primary, standby1, "prefer-standby", 0) + _tsa(primary, standby1, standby1, "standby", 0) + _tsa(standby1, primary, standby1, "standby", 0) + _tsa(standby1, standby2, None, "read-write", 2) + _tsa(standby1, standby2, None, "primary", 2) + _tsa(primary, primary, None, "read-only", 2) + _tsa(primary, primary, None, "standby", 2) + + +def _show_and_read_slot(primary): + primary.psql_capture( + "CREATE ROLE repl_role REPLICATION LOGIN;\n" + "GRANT pg_read_all_settings TO repl_role;" + ) + common = "host={} port={} user=repl_role".format(primary.host, primary.port) + rep = common + " replication=1" + db = common + " replication=database dbname=postgres" + for connstr, label in ((rep, "physical"), (db, "logical")): + for sql in ("SHOW ALL;", "SHOW work_mem;", "SHOW primary_conninfo;"): + assert ( + primary.psql_capture(sql, connstr=connstr).rc == 0 + ), "{} over {} replication".format(sql, label) + slotname = "test_read_replication_slot_physical" + res = primary.psql_capture( + "READ_REPLICATION_SLOT non_existent_slot;", connstr=rep, on_error_stop=False + ) + assert res.rc == 0, "READ_REPLICATION_SLOT exit code 0 on success" + assert re.search( + r"^\|\|$", res.stdout.strip(), re.M + ), "READ_REPLICATION_SLOT returns NULL values if slot does not exist" + primary.psql_capture( + "CREATE_REPLICATION_SLOT {} PHYSICAL RESERVE_WAL;".format(slotname), connstr=rep + ) + res = primary.psql_capture( + "READ_REPLICATION_SLOT {};".format(slotname), connstr=rep + ) + assert res.rc == 0, "READ_REPLICATION_SLOT success with existing slot" + assert re.search( + r"^physical\|[^|]*\|1$", res.stdout.strip(), re.M + ), "READ_REPLICATION_SLOT returns tuple with slot information" + primary.psql_capture("DROP_REPLICATION_SLOT {};".format(slotname), connstr=rep) + + +def _get_slot_xmins(node, slotname, check_expr): + assert node.poll_query_until( + "SELECT {}\nFROM pg_catalog.pg_replication_slots\n" + "WHERE slot_name = '{}';".format(check_expr, slotname) + ), "Timed out waiting for slot xmins to advance" + info = node.slot(slotname) + return info["xmin"], info["catalog_xmin"] + + +def _slot_xmins(primary, standby1, standby2): + assert primary.poll_query_until( + "SELECT sum(reads) > 0 FROM pg_catalog.pg_stat_io\n" + "WHERE backend_type = 'walsender' AND object = 'wal'" + ), "Timed out waiting for the walsender to update its IO statistics" + primary.append_conf("max_replication_slots = 4") + primary.restart() + assert ( + primary.psql_capture( + "SELECT pg_create_physical_replication_slot('standby_1');" + ).rc + == 0 + ), "physical slot created on primary" + standby1.append_conf("primary_slot_name = standby_1") + standby1.append_conf("wal_receiver_status_interval = 1") + standby1.append_conf("max_replication_slots = 4") + standby1.restart() + assert ( + standby1.psql_capture( + "SELECT pg_create_physical_replication_slot('standby_2');" + ).rc + == 0 + ), "physical slot created on intermediate replica" + standby2.append_conf("primary_slot_name = standby_2") + standby2.append_conf("wal_receiver_status_interval = 1") + standby2.reload() + xmin, cat = _get_slot_xmins( + primary, "standby_1", "xmin IS NULL AND catalog_xmin IS NULL" + ) + assert xmin == "" and cat == "", "non-cascaded slot null with no hs_feedback" + xmin, cat = _get_slot_xmins( + standby1, "standby_2", "xmin IS NULL AND catalog_xmin IS NULL" + ) + assert xmin == "" and cat == "", "cascaded slot null with no hs_feedback" + primary.safe_psql("CREATE TABLE replayed(val integer);") + _hs_feedback_xmin(primary, standby1, standby2) + + +def _replay_check(primary, standby1, standby2): + newval = primary.safe_psql( + "INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval " + "FROM replayed RETURNING val" + ) + primary.wait_for_replay_catchup(standby1) + standby1.wait_for_replay_catchup(standby2, primary) + assert standby1.safe_psql("SELECT 1 FROM replayed WHERE val = {}".format(newval)) + assert standby2.safe_psql("SELECT 1 FROM replayed WHERE val = {}".format(newval)) + return newval + + +def _hs_feedback_xmin(primary, standby1, standby2): + _replay_check(primary, standby1, standby2) + for node in (standby1, standby2): + assert ( + node.safe_psql( + "SELECT evtname FROM pg_event_trigger WHERE evtevent = 'login'" + ) + == "on_login_trigger" + ), "Name of login trigger" + for node in (standby1, standby2): + node.safe_psql("ALTER SYSTEM SET hot_standby_feedback = on;") + node.reload() + _replay_check(primary, standby1, standby2) + xmin, cat = _get_slot_xmins( + primary, "standby_1", "xmin IS NOT NULL AND catalog_xmin IS NULL" + ) + assert xmin != "" and cat == "", "non-cascaded slot non-null with hs feedback" + xmin1, cat1 = _get_slot_xmins( + standby1, "standby_2", "xmin IS NOT NULL AND catalog_xmin IS NULL" + ) + assert xmin1 != "" and cat1 == "", "cascaded slot non-null with hs feedback" + primary.safe_psql( + "do $$\nbegin\n for i in 10000..11000 loop\n begin\n" + " insert into tab_int values (i);\n exception\n" + " when division_by_zero then null;\n end;\n end loop;\nend$$;" + ) + primary.safe_psql("VACUUM;") + primary.safe_psql("CHECKPOINT;") + xmin2, cat2 = _get_slot_xmins(primary, "standby_1", "xmin <> '{}'".format(xmin)) + assert xmin2 != xmin and cat2 == "", "non-cascaded slot xmin changed" + xmin2, cat2 = _get_slot_xmins(standby1, "standby_2", "xmin <> '{}'".format(xmin1)) + assert xmin2 != xmin1 and cat2 == "", "cascaded slot xmin changed" + for node in (standby1, standby2): + node.safe_psql("ALTER SYSTEM SET hot_standby_feedback = off;") + node.reload() + _replay_check(primary, standby1, standby2) + xmin, cat = _get_slot_xmins( + primary, "standby_1", "xmin IS NULL AND catalog_xmin IS NULL" + ) + assert xmin == "" and cat == "", "non-cascaded slot null with hs feedback reset" + xmin, cat = _get_slot_xmins( + standby1, "standby_2", "xmin IS NULL AND catalog_xmin IS NULL" + ) + assert xmin == "" and cat == "", "cascaded slot null with hs feedback reset" + + +def _physical_slot_advance(primary, standby1, standby2): + standby2.append_conf("primary_slot_name = ''") + standby2.enable_streaming(primary) + standby2.reload() + assert ( + standby1.safe_psql( + "SELECT sum(writes) > 0 FROM pg_stat_io WHERE backend_type = " + "'walreceiver' AND object = 'wal'" + ) + == "t" + ), "WAL receiver generates statistics for WAL writes" + standby1.stop() + newval = primary.safe_psql( + "INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval " + "FROM replayed RETURNING val" + ) + primary.wait_for_catchup(standby2) + assert ( + standby2.safe_psql("SELECT 1 FROM replayed WHERE val = {}".format(newval)) + == "1" + ), "standby_2 replayed primary value {}".format(newval) + primary.safe_psql( + "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots;" + ) + phys_slot = "phys_slot" + primary.safe_psql( + "SELECT pg_create_physical_replication_slot('{}', true);".format(phys_slot) + ) + segment_removed = primary.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn())") + primary.advance_wal(1) + current_lsn = primary.safe_psql("SELECT pg_current_wal_lsn();") + assert ( + primary.psql_capture( + "SELECT pg_replication_slot_advance('{}', '{}'::pg_lsn);".format( + phys_slot, current_lsn + ) + ).rc + == 0 + ), "slot advancing with physical slot" + pre = primary.safe_psql( + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = " + "'{}';".format(phys_slot) + ) + primary.restart() + post = primary.safe_psql( + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = " + "'{}';".format(phys_slot) + ) + assert pre == post, "physical slot advance persists across restarts" + assert not ( + primary.datadir / "pg_wal" / segment_removed + ).is_file(), "WAL segment {} recycled after physical slot advancing".format( + segment_removed + ) + + +def _base_backup_interlock(primary): + connstr = primary.connstr("postgres") + " replication=database" + primary.command_fails_like( + [ + "psql", + "--no-psqlrc", + "--command", + "SELECT pg_backup_start('backup', true)", + "--command", + "BASE_BACKUP", + "--dbname", + connstr, + ], + r"a backup is already in progress in this session", + "BASE_BACKUP cannot run in session already running backup", + ) + sess = primary.background_psql( + "postgres", on_error_stop=False, replication="database" + ) + sess.send( + "BASE_BACKUP (CHECKPOINT 'fast', MAX_RATE 32);\nSELECT pg_backup_stop();\n" + ) + assert primary.poll_query_until( + "SELECT pg_cancel_backend(a.pid) FROM pg_stat_activity a, " + "pg_stat_progress_basebackup b WHERE a.pid = b.pid AND " + "a.query ~ 'BASE_BACKUP' AND b.phase = 'streaming database files';", + "t", + ), "WAL sender sending base backup killed" + sess.wait_for_stderr(r"backup is not in progress") + sess.quit() diff --git a/src/test/recovery/pyt/test_002_archiving.py b/src/test/recovery/pyt/test_002_archiving.py new file mode 100644 index 0000000000000..8775d2397e69d --- /dev/null +++ b/src/test/recovery/pyt/test_002_archiving.py @@ -0,0 +1,106 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/002_archiving.pl. + +WAL archiving with a hot standby, archive_cleanup_command/recovery_end_command, +and removal of recovery temp files and signal files at end of recovery. +""" + +import re + +import pypg + + +def test_archiving(create_pg): + """A standby restores from archives; recovery commands and temp files behave.""" + primary = create_pg("primary", has_archiving=True, allows_streaming=True) + backup_name = "my_backup" + primary.backup(backup_name) + + # Standby restoring from the primary's archives (not streaming). + standby = create_pg( + "standby", from_backup=(primary, backup_name), has_restoring=True, start=False + ) + standby.append_conf("wal_retrieve_retry_interval = '100ms'") + + data_dir = standby.datadir + cleanup_file = "archive_cleanup_command.done" + recovery_end_file = "recovery_end_command.done" + standby.append_conf( + "archive_cleanup_command = 'echo archive_cleanup_done > {}'\n" + "recovery_end_command = 'echo recovery_ended_done > {}'".format( + cleanup_file, recovery_end_file + ) + ) + standby.start() + + primary.safe_psql("CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a") + primary.safe_psql("CHECKPOINT") + current_lsn = primary.safe_psql("SELECT pg_current_wal_lsn();") + primary.safe_psql("SELECT pg_switch_wal()") + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(1001,2000))") + + assert standby.poll_query_until( + "SELECT '{}'::pg_lsn <= pg_last_wal_replay_lsn()".format(current_lsn) + ), "standby to catch up" + assert ( + standby.safe_psql("SELECT count(*) FROM tab_int") == "1000" + ), "check content from archives" + + # archive_cleanup_command runs after a restartpoint (checkpoint). + standby.safe_psql("CHECKPOINT") + assert (data_dir / cleanup_file).is_file(), "archive_cleanup_command executed" + assert not (data_dir / recovery_end_file).is_file(), "recovery_end_command not yet" + + # Promote, forcing a timeline switch and archiving of the history file. + standby.promote() + primary_archive = primary.archive_dir + assert primary.poll_query_until( + "SELECT size IS NOT NULL FROM " + "pg_stat_file('{}/00000002.history', true)".format(primary_archive) + ), "archiving of 00000002.history" + + assert ( + data_dir / recovery_end_file + ).is_file(), "recovery_end_command after promote" + + standby2 = create_pg( + "standby2", from_backup=(primary, backup_name), has_restoring=True, start=False + ) + # Make recovery_end_command fail; promotion should be unaffected. + standby2.append_conf( + "recovery_end_command = 'echo recovery_end_failed > missing_dir/xyz.file'" + ) + + # With both recovery.signal and standby.signal present, standby.signal wins + # and both are removed at the end of recovery. + standby2.set_recovery_mode() + standby2_data = standby2.datadir + assert (standby2_data / "recovery.signal").is_file(), "recovery.signal present" + assert (standby2_data / "standby.signal").is_file(), "standby.signal present" + + standby2.start() + log_location = standby2.current_log_position() + + standby2.promote() + + log_contents = pypg.slurp_file(standby2.log, log_location) + assert re.search( + r'(?s)restored log file "00000002.history" from archive', log_contents + ), "00000002.history retrieved from the archives" + assert not ( + standby2_data / "pg_wal" / "RECOVERYHISTORY" + ).is_file(), "RECOVERYHISTORY removed after promotion" + assert not ( + standby2_data / "pg_wal" / "RECOVERYXLOG" + ).is_file(), "RECOVERYXLOG removed after promotion" + assert re.search( + r"(?s)WARNING:.*recovery_end_command", log_contents + ), "recovery_end_command failure detected in logs after promotion" + + assert not ( + standby2_data / "recovery.signal" + ).is_file(), "recovery.signal was left behind after promotion" + assert not ( + standby2_data / "standby.signal" + ).is_file(), "standby.signal was left behind after promotion" diff --git a/src/test/recovery/pyt/test_003_recovery_targets.py b/src/test/recovery/pyt/test_003_recovery_targets.py new file mode 100644 index 0000000000000..1b485fd87bf6b --- /dev/null +++ b/src/test/recovery/pyt/test_003_recovery_targets.py @@ -0,0 +1,226 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/003_recovery_targets.pl. + +Tests for recovery targets: name, timestamp, XID, LSN, immediate, and invalid +configurations. +""" + +import os +import re +import time + +import pypg + + +def _test_recovery_standby( + pg_bin, + create_pg, + test_name, + node_name, + primary, + recovery_params, + num_rows, + until_lsn, +): + """Create a restoring standby with a recovery target and check its content.""" + standby = create_pg( + node_name, from_backup=(primary, "my_backup"), has_restoring=True, start=False + ) + for param in recovery_params: + standby.append_conf(param) + standby.start() + + assert standby.poll_query_until( + "SELECT '{}'::pg_lsn <= pg_last_wal_replay_lsn()".format(until_lsn) + ), "standby to catch up" + + result = standby.safe_psql("SELECT count(*) FROM tab_int") + assert result == num_rows, "check standby content for {}".format(test_name) + + standby.stop() + + +def _make_primary_data(primary): + """Create the WAL history with named restore points and return the markers.""" + primary.safe_psql("CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a") + lsn1 = primary.safe_psql("SELECT pg_current_wal_lsn();") + primary.backup("my_backup") + + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(1001,2000))") + lsn2, recovery_txid = primary.safe_psql( + "SELECT pg_current_wal_lsn(), pg_current_xact_id();" + ).split("|") + + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(2001,3000))") + lsn3 = primary.safe_psql("SELECT pg_current_wal_lsn();") + recovery_time = primary.safe_psql("SELECT now()") + + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(3001,4000))") + recovery_name = "my_target" + lsn4 = primary.safe_psql("SELECT pg_current_wal_lsn();") + primary.safe_psql("SELECT pg_create_restore_point('{}');".format(recovery_name)) + + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(4001,5000))") + recovery_lsn = primary.safe_psql("SELECT pg_current_wal_lsn()") + + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(5001,6000))") + primary.safe_psql("SELECT pg_switch_wal()") + + return { + "lsn1": lsn1, + "lsn2": lsn2, + "recovery_txid": recovery_txid, + "lsn3": lsn3, + "recovery_time": recovery_time, + "recovery_name": recovery_name, + "lsn4": lsn4, + "recovery_lsn": recovery_lsn, + } + + +def test_recovery_targets(pg_bin, create_pg): + """Recovery to each kind of target, and invalid-target handling.""" + primary = create_pg( + "primary", has_archiving=True, allows_streaming=True, start=False + ) + # Bump the transaction ID epoch to stress recovery_target_xid parsing. + pg_bin.command_ok(["pg_resetwal", "--epoch", "1", primary.datadir]) + primary.start() + + m = _make_primary_data(primary) + + _test_recovery_standby( + pg_bin, + create_pg, + "immediate target", + "standby_1", + primary, + ["recovery_target = 'immediate'"], + "1000", + m["lsn1"], + ) + _test_recovery_standby( + pg_bin, + create_pg, + "XID", + "standby_2", + primary, + ["recovery_target_xid = '{}'".format(m["recovery_txid"])], + "2000", + m["lsn2"], + ) + _test_recovery_standby( + pg_bin, + create_pg, + "time", + "standby_3", + primary, + ["recovery_target_time = '{}'".format(m["recovery_time"])], + "3000", + m["lsn3"], + ) + _test_recovery_standby( + pg_bin, + create_pg, + "name", + "standby_4", + primary, + ["recovery_target_name = '{}'".format(m["recovery_name"])], + "4000", + m["lsn4"], + ) + _test_recovery_standby( + pg_bin, + create_pg, + "LSN", + "standby_5", + primary, + ["recovery_target_lsn = '{}'".format(m["recovery_lsn"])], + "5000", + m["recovery_lsn"], + ) + + # Multiple overriding settings are allowed (last one wins). + _test_recovery_standby( + pg_bin, + create_pg, + "multiple overriding settings", + "standby_6", + primary, + [ + "recovery_target_name = '{}'".format(m["recovery_name"]), + "recovery_target_name = ''", + "recovery_target_time = '{}'".format(m["recovery_time"]), + ], + "3000", + m["lsn3"], + ) + + _test_conflicting_targets(pg_bin, create_pg, primary, m) + _test_recovery_ends_early(pg_bin, create_pg, primary) + _test_invalid_target_gucs(primary) + + +def _test_conflicting_targets(pg_bin, create_pg, primary, m): + standby = create_pg( + "standby_7", from_backup=(primary, "my_backup"), has_restoring=True, start=False + ) + standby.append_conf( + "recovery_target_name = '{}'\nrecovery_target_time = '{}'".format( + m["recovery_name"], m["recovery_time"] + ) + ) + result = pg_bin.result( + ["pg_ctl", "--pgdata", standby.datadir, "--log", standby.log, "start"] + ) + assert result.rc != 0, "invalid recovery startup fails" + assert re.search( + r"multiple recovery targets specified", pypg.slurp_file(standby.log) + ), "multiple conflicting settings" + + +def _test_recovery_ends_early(pg_bin, create_pg, primary): + standby = create_pg( + "standby_8", + from_backup=(primary, "my_backup"), + has_restoring=True, + standby=False, + start=False, + ) + standby.append_conf("recovery_target_name = 'does_not_exist'") + pg_bin.result( + ["pg_ctl", "--pgdata", standby.datadir, "--log", standby.log, "start"] + ) + + # Wait for postgres to terminate. + pidfile = standby.datadir / "postmaster.pid" + for _ in range(10 * int(os.environ.get("PG_TEST_TIMEOUT_DEFAULT", "180"))): + if not pidfile.is_file(): + break + time.sleep(0.1) + + assert re.search( + r"FATAL: .* recovery ended before configured recovery target was reached", + pypg.slurp_file(standby.log), + ), "recovery end before target reached is a fatal error" + + +def _test_invalid_target_gucs(primary): + cases = [ + ("recovery_target_timeline", "bogus", r"is not a valid number"), + ("recovery_target_timeline", "0", r"must be between 1 and 4294967295"), + ("recovery_target_timeline", "4294967296", r"must be between 1 and 4294967295"), + ("recovery_target_xid", "bogus", r"is not a valid number"), + ("recovery_target_xid", "-1", r"is not a valid number"), + ( + "recovery_target_xid", + "0", + r"without epoch must be greater than or equal to 3", + ), + ] + for guc, value, pattern in cases: + result = primary.bin.result( + ["psql", "-c", "ALTER SYSTEM SET {} TO '{}'".format(guc, value)] + ) + assert re.search(pattern, result.stderr), "invalid {} ({})".format(guc, value) diff --git a/src/test/recovery/pyt/test_004_timeline_switch.py b/src/test/recovery/pyt/test_004_timeline_switch.py new file mode 100644 index 0000000000000..468169bee6ecf --- /dev/null +++ b/src/test/recovery/pyt/test_004_timeline_switch.py @@ -0,0 +1,106 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/004_timeline_switch.pl. + +A cascading standby must be able to follow a newly-promoted standby on a new +timeline. +""" + + +def test_timeline_switch(create_pg): + """Cascading standby follows a promoted standby across a timeline switch.""" + primary = create_pg("primary", allows_streaming=True) + + backup_name = "my_backup" + primary.backup(backup_name) + + standby_1 = create_pg( + "standby_1", from_backup=(primary, backup_name), has_streaming=True + ) + standby_2 = create_pg( + "standby_2", from_backup=(primary, backup_name), has_streaming=True + ) + + primary.safe_psql("CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a") + + # A clean stop ensures both standbys received and flushed all records. + primary.stop() + + # Promote standby 1, switching it to a new timeline. + assert ( + standby_1.safe_psql("SELECT pg_promote(wait_seconds => 300)") == "t" + ), "promotion of standby with pg_promote" + + # Switch standby 2 to replay from standby 1. The WAL receiver should stay + # alive across the switch and the new conninfo must not leak. + secret = "dont_show_me" + connstr_1 = standby_1.connstr() + standby_2.append_conf("primary_conninfo='{} password={}'".format(connstr_1, secret)) + + # Rotate the logfile before restarting, for the log checks below. + standby_2.rotate_logfile() + standby_2.restart() + + # Wait for the walreceiver to reconnect after the restart. + assert standby_2.poll_query_until( + "SELECT EXISTS(SELECT 1 FROM pg_stat_wal_receiver)" + ) + wr_pid_before_switch = standby_2.safe_psql("SELECT pid FROM pg_stat_wal_receiver") + + standby_1.safe_psql("INSERT INTO tab_int VALUES (generate_series(1001,2000))") + standby_1.wait_for_catchup(standby_2) + + assert ( + standby_2.safe_psql("SELECT count(*) FROM tab_int") == "2000" + ), "check content of standby 2" + + # The WAL receiver should not have been stopped while switching timelines. + assert not standby_2.log_matches( + "FATAL: .* terminating walreceiver process due to administrator command" + ), "WAL receiver should not be stopped across timeline jumps" + + wr_pid_after_switch = standby_2.safe_psql("SELECT pid FROM pg_stat_wal_receiver") + assert ( + wr_pid_before_switch == wr_pid_after_switch + ), "WAL receiver PID matches across timeline jumps" + + raw_conninfo_count = standby_2.safe_psql( + "SELECT count(*) FROM pg_stat_wal_receiver " + "WHERE conninfo LIKE '%{}%'".format(secret) + ) + assert ( + raw_conninfo_count == "0" + ), "pg_stat_wal_receiver.conninfo not updated across timeline jumps" + + _test_archiving_timeline(create_pg, backup_name) + + +def _test_archiving_timeline(create_pg, backup_name): + """A standby follows a primary on a newer timeline with WAL archiving on.""" + primary_2 = create_pg( + "primary_2", allows_streaming=True, has_archiving=True, start=False + ) + primary_2.append_conf("wal_keep_size = 512MB") + primary_2.start() + + primary_2.backup(backup_name) + + standby_3 = create_pg( + "standby_3", + from_backup=(primary_2, backup_name), + has_streaming=True, + start=False, + ) + + # Restart the primary in standby mode and promote it, onto a new timeline. + primary_2.set_standby_mode() + primary_2.restart() + primary_2.promote() + + standby_3.start() + primary_2.safe_psql("CREATE TABLE tab_int AS SELECT 1 AS a") + primary_2.wait_for_catchup(standby_3) + + assert ( + standby_3.safe_psql("SELECT count(*) FROM tab_int") == "1" + ), "check content of standby 3" diff --git a/src/test/recovery/pyt/test_005_replay_delay.py b/src/test/recovery/pyt/test_005_replay_delay.py new file mode 100644 index 0000000000000..4d5de1a97664d --- /dev/null +++ b/src/test/recovery/pyt/test_005_replay_delay.py @@ -0,0 +1,84 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/005_replay_delay.pl. + +Checks recovery_min_apply_delay and recovery pause. +""" + +import time + + +def test_replay_delay(create_pg): + """A standby honors recovery_min_apply_delay; recovery can pause/resume.""" + primary = create_pg("primary", allows_streaming=True) + primary.safe_psql("CREATE TABLE tab_int AS SELECT generate_series(1, 10) AS a") + + backup_name = "my_backup" + primary.backup(backup_name) + + delay = 3 + standby = create_pg( + "standby", from_backup=(primary, backup_name), has_streaming=True, start=False + ) + standby.append_conf("recovery_min_apply_delay = '{}s'".format(delay)) + standby.start() + + # Record a base timestamp just before the insertion so the delay comparison + # is predictable even on slow machines. + primary_insert_time = time.time() + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(11, 20))") + + until_lsn = primary.safe_psql("SELECT pg_current_wal_lsn()") + assert standby.poll_query_until( + "SELECT (pg_last_wal_replay_lsn() - '{}'::pg_lsn) >= 0".format(until_lsn) + ), "standby never caught up" + + assert ( + time.time() - primary_insert_time >= delay + ), "standby applies WAL only after replication delay" + + # Check that recovery can be paused or resumed as expected. + standby2 = create_pg( + "standby2", from_backup=(primary, backup_name), has_streaming=True + ) + + assert ( + standby2.safe_psql("SELECT pg_get_wal_replay_pause_state()") == "not paused" + ), "pg_get_wal_replay_pause_state() reports not paused" + + standby2.safe_psql("SELECT pg_wal_replay_pause()") + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(21,30))") + assert standby2.poll_query_until( + "SELECT pg_get_wal_replay_pause_state() = 'paused'" + ), "recovery to be paused" + + # Even if new WAL records stream from the primary, the paused state doesn't + # replay them. + receive_lsn = standby2.safe_psql("SELECT pg_last_wal_receive_lsn()") + replay_lsn = standby2.safe_psql("SELECT pg_last_wal_replay_lsn()") + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(31,40))") + assert standby2.poll_query_until( + "SELECT '{}'::pg_lsn < pg_last_wal_receive_lsn()".format(receive_lsn) + ), "new WAL to be streamed" + assert ( + standby2.safe_psql("SELECT pg_last_wal_replay_lsn()") == replay_lsn + ), "no WAL is replayed in the paused state" + + # Resume recovery and wait until it's actually resumed. + standby2.safe_psql("SELECT pg_wal_replay_resume()") + assert standby2.poll_query_until( + "SELECT pg_get_wal_replay_pause_state() = 'not paused' " + "AND pg_last_wal_replay_lsn() > '{}'::pg_lsn".format(replay_lsn) + ), "recovery to be resumed" + + # A promotion while paused should end the paused state and continue. + standby2.safe_psql("SELECT pg_wal_replay_pause()") + primary.safe_psql("INSERT INTO tab_int VALUES (generate_series(41,50))") + assert standby2.poll_query_until( + "SELECT pg_get_wal_replay_pause_state() = 'paused'" + ), "recovery to be paused" + + standby2.promote() + assert standby2.poll_query_until( + "SELECT NOT pg_is_in_recovery()" + ), "promotion to finish" diff --git a/src/test/recovery/pyt/test_006_logical_decoding.py b/src/test/recovery/pyt/test_006_logical_decoding.py new file mode 100644 index 0000000000000..3ca2e3f25614f --- /dev/null +++ b/src/test/recovery/pyt/test_006_logical_decoding.py @@ -0,0 +1,250 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/006_logical_decoding.pl. + +Core logical decoding behaviour on a single node: replication-command error +paths, SQL and pg_recvlogical decoding producing identical output, decoding +state surviving a fast restart, cross-database slot-use failures, a database +with an active logical slot refusing to drop (and succeeding once inactive), +logical slot advance persisting across restarts, and pg_stat_replication_slots +statistics/reset semantics. +""" + +import platform +import subprocess + +import pypg + +_EXPECTED = ( + "BEGIN\n" + "table public.decoding_test: INSERT: x[integer]:1 y[text]:'1'\n" + "table public.decoding_test: INSERT: x[integer]:2 y[text]:'2'\n" + "table public.decoding_test: INSERT: x[integer]:3 y[text]:'3'\n" + "table public.decoding_test: INSERT: x[integer]:4 y[text]:'4'\n" + "COMMIT" +) + + +def _expect_stderr(node, query, pattern, msg, replication=None): + res = node.psql_capture(query, dbname="template1", replication=replication) + assert pattern in res.stderr, "{}: {!r} not in {!r}".format( + msg, pattern, res.stderr + ) + + +def test_006_logical_decoding(create_pg): + """Logical decoding error paths, output, restart, drop, advance and stats.""" + node = create_pg("primary", allows_streaming=True, start=False) + node.append_conf("\nwal_level = logical\n") + node.start() + node.safe_psql("CREATE TABLE decoding_test(x integer, y text);") + node.safe_psql( + "SELECT pg_create_logical_replication_slot('test_slot', 'test_decoding');" + ) + _expect_stderr( + node, + "START_REPLICATION SLOT test_slot LOGICAL 0/0", + 'replication slot "test_slot" was not created in this database', + "Logical decoding correctly fails to start", + replication="database", + ) + _expect_stderr( + node, + "READ_REPLICATION_SLOT test_slot;", + "cannot use READ_REPLICATION_SLOT with a logical replication slot", + "READ_REPLICATION_SLOT not supported for logical slots", + replication="database", + ) + _expect_stderr( + node, + "START_REPLICATION SLOT s1 LOGICAL 0/1", + "ERROR: logical decoding requires a database connection", + "Logical decoding fails on non-database connection", + replication="true", + ) + node.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT s, s::text " + "FROM generate_series(1,10) s;" + ) + result = node.safe_psql( + "SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);" + ) + assert len(result.split("\n")) == 12, "Decoding produced 12 rows inc BEGIN/COMMIT" + node.restart() + result = node.safe_psql( + "SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);" + ) + assert result == "", "Decoding after fast restart repeats no rows" + node.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT s, s::text " + "FROM generate_series(1,4) s;" + ) + stdout_sql = node.safe_psql( + "SELECT data FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL, " + "'include-xids', '0', 'skip-empty-xacts', '1');" + ) + assert stdout_sql == _EXPECTED, "got expected output from SQL decoding session" + endpos = node.safe_psql( + "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) " + "ORDER BY lsn DESC LIMIT 1;" + ) + node.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT s, s::text " + "FROM generate_series(5,50) s;" + ) + opts = {"include-xids": "0", "skip-empty-xacts": "1"} + stdout_recv = node.pg_recvlogical_upto( + "postgres", "test_slot", endpos, pypg.test_timeout_default(), options=opts + ) + assert ( + stdout_recv.rstrip("\n") == _EXPECTED + ), "got same expected output from pg_recvlogical decoding session" + assert node.poll_query_until( + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots " + "WHERE slot_name = 'test_slot' AND active_pid IS NULL)" + ), "slot never became inactive" + stdout_recv = node.pg_recvlogical_upto( + "postgres", "test_slot", endpos, pypg.test_timeout_default(), options=opts + ) + assert stdout_recv.rstrip("\n") == "", "pg_recvlogical acknowledged changes" + _otherdb_phase(node) + _advance_and_stats_phase(node) + node.stop() + + +def _otherdb_phase(node): + """A DB with an active logical slot refuses to drop; succeeds once inactive.""" + node.safe_psql("CREATE DATABASE otherdb") + assert ( + node.psql_capture( + "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) " + "ORDER BY lsn DESC LIMIT 1;", + dbname="otherdb", + ).rc + == 3 + ), "replaying logical slot from another database fails" + node.safe_psql( + "SELECT pg_create_logical_replication_slot('otherdb_slot', 'test_decoding');", + dbname="otherdb", + ) + if platform.system() != "Windows": + recv = subprocess.Popen( # pylint: disable=consider-using-with + [ + "pg_recvlogical", + "--dbname", + node.connstr("otherdb"), + "--slot", + "otherdb_slot", + "--file", + "-", + "--start", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + try: + assert node.poll_query_until( + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots " + "WHERE slot_name = 'otherdb_slot' AND active_pid IS NOT NULL)", + dbname="otherdb", + ), "slot never became active" + assert ( + node.psql_capture("DROP DATABASE otherdb").rc == 3 + ), "dropping a DB with active logical slots fails" + finally: + recv.terminate() + recv.wait() + assert ( + node.slot("otherdb_slot")["plugin"] == "test_decoding" + ), "logical slot still exists" + assert node.poll_query_until( + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots " + "WHERE slot_name = 'otherdb_slot' AND active_pid IS NULL)", + dbname="otherdb", + ), "slot never became inactive" + assert ( + node.psql_capture("DROP DATABASE otherdb").rc == 0 + ), "dropping a DB with inactive logical slots succeeds" + assert ( + node.slot("otherdb_slot")["plugin"] == "" + ), "logical slot was actually dropped with DB" + + +def _advance_and_stats_phase(node): + """Logical slot advance persists across restart; stats/reset semantics.""" + logical_slot = "logical_slot" + node.safe_psql( + "SELECT pg_create_logical_replication_slot('{}', 'test_decoding', " + "false, false, true);".format(logical_slot) + ) + node.safe_psql( + "CREATE TABLE tab_logical_slot (a int);\n" + "INSERT INTO tab_logical_slot VALUES (generate_series(1,10));" + ) + current_lsn = node.safe_psql("SELECT pg_current_wal_lsn();") + assert ( + node.psql_capture( + "SELECT pg_replication_slot_advance('{}', '{}'::pg_lsn);".format( + logical_slot, current_lsn + ) + ).rc + == 0 + ), "slot advancing with logical slot" + pre = node.safe_psql( + "SELECT restart_lsn from pg_replication_slots " + "WHERE slot_name = '{}';".format(logical_slot) + ) + node.restart() + post = node.safe_psql( + "SELECT restart_lsn from pg_replication_slots " + "WHERE slot_name = '{}';".format(logical_slot) + ) + assert pre == post, "logical slot advance persists across restarts" + assert ( + node.safe_psql( + "SELECT total_bytes > 0, stats_reset IS NULL " + "FROM pg_stat_replication_slots WHERE slot_name = 'test_slot'" + ) + == "t|t" + ), "Total bytes is > 0 and stats_reset is NULL for slot 'test_slot'." + node.safe_psql("SELECT pg_stat_reset_replication_slot('test_slot')") + reset1 = node.safe_psql( + "SELECT stats_reset FROM pg_stat_replication_slots " + "WHERE slot_name = 'test_slot'" + ) + node.safe_psql("SELECT pg_stat_reset_replication_slot('test_slot')") + assert ( + node.safe_psql( + "SELECT stats_reset > '{}'::timestamptz, total_bytes = 0 " + "FROM pg_stat_replication_slots WHERE slot_name = 'test_slot'".format( + reset1 + ) + ) + == "t|t" + ), "reset timestamp later after second reset and total_bytes 0" + assert ( + node.safe_psql( + "SELECT stats_reset IS NULL FROM pg_stat_replication_slots " + "WHERE slot_name = 'logical_slot'" + ) + == "t" + ), "Stats_reset is NULL for slot 'logical_slot' before reset." + reset1 = node.safe_psql( + "SELECT stats_reset FROM pg_stat_replication_slots " + "WHERE slot_name = 'test_slot'" + ) + node.safe_psql("SELECT pg_stat_reset_replication_slot(NULL)") + assert ( + node.safe_psql( + "SELECT stats_reset IS NOT NULL FROM pg_stat_replication_slots " + "WHERE slot_name = 'logical_slot'" + ) + == "t" + ), "Stats_reset is not NULL for slot 'logical_slot' after reset all." + assert ( + node.safe_psql( + "SELECT stats_reset > '{}'::timestamptz FROM pg_stat_replication_slots " + "WHERE slot_name = 'test_slot'".format(reset1) + ) + == "t" + ), "reset timestamp later after resetting stats again." diff --git a/src/test/recovery/pyt/test_007_sync_rep.py b/src/test/recovery/pyt/test_007_sync_rep.py new file mode 100644 index 0000000000000..cdda8c91329a9 --- /dev/null +++ b/src/test/recovery/pyt/test_007_sync_rep.py @@ -0,0 +1,131 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/007_sync_rep.pl. + +Minimal test of synchronous replication sync_state transitions. +""" + +# Query checking sync_priority and sync_state of each standby. +_CHECK_SQL = ( + "SELECT application_name, sync_priority, sync_state FROM pg_stat_replication " + "ORDER BY application_name;" +) + + +def _test_sync_state(primary, expected, msg, setting=None): + if setting is not None: + primary.safe_psql( + "ALTER SYSTEM SET synchronous_standby_names = '{}';".format(setting) + ) + primary.reload() + assert primary.poll_query_until(_CHECK_SQL, expected=expected), msg + + +def _start_standby_and_wait(primary, standby): + standby.start() + assert primary.poll_query_until( + "SELECT count(1) = 1 FROM pg_stat_replication " + "WHERE application_name = '{}'".format(standby.name) + ), 'standby "{}" registered'.format(standby.name) + + +def test_sync_rep(create_pg): + """sync_state is determined correctly across synchronous_standby_names.""" + primary = create_pg("primary", allows_streaming=True) + backup_name = "primary_backup" + primary.backup(backup_name) + + def standby(name): + return create_pg( + name, from_backup=(primary, backup_name), has_streaming=True, start=False + ) + + standby1 = standby("standby1") + _start_standby_and_wait(primary, standby1) + standby2 = standby("standby2") + _start_standby_and_wait(primary, standby2) + standby3 = standby("standby3") + _start_standby_and_wait(primary, standby3) + + _test_sync_state( + primary, + "standby1|1|sync\nstandby2|2|potential\nstandby3|0|async", + "old syntax of synchronous_standby_names", + "standby1,standby2", + ) + _test_sync_state( + primary, + "standby1|1|sync\nstandby2|1|potential\nstandby3|1|potential", + "asterisk in synchronous_standby_names", + "*", + ) + + # Rearrange the order of standbys in the WalSnd array. + standby1.stop() + standby2.stop() + standby3.stop() + _start_standby_and_wait(primary, standby2) + _start_standby_and_wait(primary, standby3) + + _test_sync_state( + primary, + "standby2|2|sync\nstandby3|3|sync", + "2 synchronous standbys", + "2(standby1,standby2,standby3)", + ) + + _start_standby_and_wait(primary, standby1) + + standby4 = standby("standby4") + standby4.start() + + _test_sync_state( + primary, + "standby1|1|sync\nstandby2|2|sync\nstandby3|3|potential\nstandby4|0|async", + "2 sync, 1 potential, and 1 async", + ) + _test_sync_state( + primary, + "standby1|0|async\nstandby2|4|sync\nstandby3|3|sync\nstandby4|1|sync", + "num_sync exceeds the num of potential sync standbys", + "6(standby4,standby0,standby3,standby2)", + ) + _test_sync_state( + primary, + "standby1|1|sync\nstandby2|2|sync\nstandby3|2|potential\nstandby4|2|potential", + "asterisk before another standby name", + "2(standby1,*,standby2)", + ) + _test_sync_state( + primary, + "standby1|1|potential\nstandby2|1|sync\nstandby3|1|sync\nstandby4|1|potential", + "multiple standbys having the same priority are chosen as sync", + "2(*)", + ) + + standby3.stop() + _test_sync_state( + primary, + "standby1|1|sync\nstandby2|1|sync\nstandby4|1|potential", + "potential standby found earlier in array is promoted to sync", + ) + _test_sync_state( + primary, + "standby1|1|sync\nstandby2|2|sync\nstandby4|0|async", + "priority-based sync replication specified by FIRST keyword", + "FIRST 2(standby1, standby2)", + ) + _test_sync_state( + primary, + "standby1|1|quorum\nstandby2|1|quorum\nstandby4|0|async", + "2 quorum and 1 async", + "ANY 2(standby1, standby2)", + ) + + standby3.start() + _test_sync_state( + primary, + "standby1|1|quorum\nstandby2|1|quorum\nstandby3|1|quorum\nstandby4|1|quorum", + "all standbys are considered as candidates for quorum sync standbys", + "ANY 2(*)", + ) diff --git a/src/test/recovery/pyt/test_008_fsm_truncation.py b/src/test/recovery/pyt/test_008_fsm_truncation.py new file mode 100644 index 0000000000000..b8add0887d31b --- /dev/null +++ b/src/test/recovery/pyt/test_008_fsm_truncation.py @@ -0,0 +1,67 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/008_fsm_truncation.pl. + +FSM-driven INSERT just after truncation clears FSM slots for removed blocks; +the FSM must not return a page that no longer exists. +""" + + +def test_fsm_truncation(create_pg): + """An INSERT succeeds on a promoted standby with a truncated relation FSM.""" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf( + "wal_log_hints = on\nmax_prepared_transactions = 5\nautovacuum = off" + ) + primary.start() + + primary.backup("primary_backup") + standby = create_pg( + "standby", from_backup=(primary, "primary_backup"), has_streaming=True + ) + + primary.safe_psql( + "create table testtab (a int, b char(100));\n" + "insert into testtab select generate_series(1,1000), 'foo';\n" + "insert into testtab select generate_series(1,1000), 'foo';\n" + "delete from testtab where ctid > '(8,0)';" + ) + + # Take a lock on the table to prevent the following vacuum from truncating. + primary.safe_psql( + """begin; +lock table testtab in row share mode; +prepare transaction 'p1';""" + ) + + # Vacuum, update FSM without truncation. + primary.safe_psql("vacuum verbose testtab") + primary.safe_psql("checkpoint") + + # More insert/deletes and another vacuum to ensure full-page writes. + primary.safe_psql( + "insert into testtab select generate_series(1,1000), 'foo';\n" + "delete from testtab where ctid > '(8,0)';\n" + "vacuum verbose testtab;" + ) + + # Ensure all buffers are clean on the standby. + standby.safe_psql("checkpoint") + + # Release the lock; vacuum again, which should lead to truncation. + primary.safe_psql("rollback prepared 'p1';\nvacuum verbose testtab;") + primary.safe_psql("checkpoint") + + until_lsn = primary.safe_psql("SELECT pg_current_wal_lsn();") + assert standby.poll_query_until( + "SELECT '{}'::pg_lsn <= pg_last_wal_replay_lsn()".format(until_lsn) + ), "standby to catch up" + + standby.promote() + standby.safe_psql("checkpoint") + + # Restart to discard the in-memory copy of the FSM. + standby.restart() + + # INSERT should work on the standby. + standby.safe_psql("insert into testtab select generate_series(1,1000), 'foo';") diff --git a/src/test/recovery/pyt/test_009_twophase.py b/src/test/recovery/pyt/test_009_twophase.py new file mode 100644 index 0000000000000..bb715f344d2d0 --- /dev/null +++ b/src/test/recovery/pyt/test_009_twophase.py @@ -0,0 +1,287 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/009_twophase.pl. + +Verifies prepared (two-phase) transactions survive restarts, immediate +teardowns, and primary/standby role swaps. Two synchronous-replication nodes +(london and paris) trade the primary role repeatedly; at each step a 2PC +transaction is prepared, then committed/rolled back after a restart, teardown, +or promotion, and the standby's shared-memory 2PC state and MVCC visibility are +checked. At the end the full t_009_tbl contents are validated on both nodes. +""" + + +class _Roles: + """Tracks which node is currently primary/standby (they swap).""" + + def __init__(self, primary, standby): + self.primary = primary + self.standby = standby + + def swap(self): + self.primary, self.standby = self.standby, self.primary + + @property + def name(self): + return self.primary.name + + +def _configure_and_reload(node, parameter): + node.append_conf("\n{}\n".format(parameter)) + out = node.psql_capture("SELECT pg_reload_conf()") + assert out.stdout.strip() == "t", "reload node {} with {}".format( + node.name, parameter + ) + + +def _issue(node, body): + """Run a multi-statement block with on_error_stop off; return rc.""" + return node.psql_capture(body, on_error_stop=False).rc + + +def test_009_twophase(create_pg): + """Prepared transactions survive restart/teardown/promotion role swaps.""" + london = create_pg("london", allows_streaming=True, start=False) + london.append_conf("\nmax_prepared_transactions = 10\nlog_checkpoints = true\n") + london.start() + london.backup("london_backup") + paris = create_pg( + "paris", from_backup=(london, "london_backup"), has_streaming=True, start=False + ) + paris.append_conf("\nsubtransaction_buffers = 32\n") + paris.start() + _configure_and_reload(london, "synchronous_standby_names = 'paris'") + _configure_and_reload(paris, "synchronous_standby_names = 'london'") + roles = _Roles(london, paris) + _restart_teardown_phase(roles) + _standby_cleanup_phase(roles) + _promotion_phase(roles, london, paris) + _final_checks(roles) + + +def _prep(name, lo, hi, nm): + return ( + "BEGIN;\nINSERT INTO t_009_tbl VALUES ({lo}, 'issued to {nm}');\n" + "SAVEPOINT s1;\n" + "INSERT INTO t_009_tbl VALUES ({hi}, 'issued to {nm}');\n" + "PREPARE TRANSACTION '{name}';".format(lo=lo, hi=hi, nm=nm, name=name) + ) + + +def _restart_teardown_phase(roles): + p = roles.primary + nm = roles.name + p.psql_capture("CREATE TABLE t_009_tbl (id int, msg text)") + _issue(p, _prep("xact_009_1", 1, 2, nm) + "\n" + _prep("xact_009_2", 3, 4, nm)) + p.stop() + p.start() + assert _issue(p, "COMMIT PREPARED 'xact_009_1'") == 0, "commit after restart" + assert _issue(p, "ROLLBACK PREPARED 'xact_009_2'") == 0, "rollback after restart" + _issue( + p, + "CHECKPOINT;\n" + + _prep("xact_009_3", 5, 6, nm) + + "\n" + + _prep("xact_009_4", 7, 8, nm), + ) + p.teardown_node() + p.start() + assert _issue(p, "COMMIT PREPARED 'xact_009_3'") == 0, "commit after teardown" + assert _issue(p, "ROLLBACK PREPARED 'xact_009_4'") == 0, "rollback after teardown" + _issue( + p, + "CHECKPOINT;\n" + + _prep("xact_009_5", 9, 10, nm) + + "\nCOMMIT PREPARED 'xact_009_5';\n" + + _prep("xact_009_5", 11, 12, nm), + ) + p.teardown_node() + p.start() + assert _issue(p, "COMMIT PREPARED 'xact_009_5'") == 0, "same GID replay" + _issue(p, _prep("xact_009_6", 13, 14, nm) + "\nCOMMIT PREPARED 'xact_009_6';") + p.teardown_node() + p.start() + assert _issue(p, _prep("xact_009_7", 15, 16, nm)) == 0, "2PC shmem cleanup" + p.psql_capture("COMMIT PREPARED 'xact_009_7'") + + +def _standby_cleanup_phase(roles): + p, s = roles.primary, roles.standby + nm = roles.name + _issue(p, _prep("xact_009_8", 17, 18, nm) + "\nCOMMIT PREPARED 'xact_009_8';") + assert ( + s.psql_capture("SELECT count(*) FROM pg_prepared_xacts").stdout.strip() == "0" + ), "standby shmem cleanup without checkpoint" + _issue(p, _prep("xact_009_9", 19, 20, nm)) + s.psql_capture("CHECKPOINT") + p.psql_capture("COMMIT PREPARED 'xact_009_9'") + assert ( + s.psql_capture("SELECT count(*) FROM pg_prepared_xacts").stdout.strip() == "0" + ), "standby shmem cleanup after checkpoint" + + +def _promotion_phase(roles, london, paris): + p = roles.primary + nm = roles.name + _issue(p, _prep("xact_009_10", 21, 22, nm)) + p.stop() + roles.standby.promote() + roles.swap() # paris primary, london standby + assert ( + _issue( + roles.primary, "SET synchronous_commit = off; COMMIT PREPARED 'xact_009_10'" + ) + == 0 + ), "restore prepared xact on promoted standby" + roles.standby.enable_streaming(roles.primary) + roles.standby.start() + nm = roles.name + _issue(roles.primary, _prep("xact_009_11", 23, 24, nm)) + roles.primary.stop() + roles.standby.restart() + roles.standby.promote() + roles.swap() # london primary, paris standby + assert ( + roles.primary.psql_capture( + "SELECT count(*) FROM pg_prepared_xacts" + ).stdout.strip() + == "1" + ), "restore prepared xacts from files with primary down" + roles.standby.enable_streaming(roles.primary) + roles.standby.start() + roles.primary.psql_capture("COMMIT PREPARED 'xact_009_11'") + nm = roles.name + _issue(roles.primary, _prep("xact_009_12", 25, 26, nm)) + roles.primary.stop() + roles.standby.teardown_node() + roles.standby.start() + roles.standby.promote() + roles.swap() # paris primary, london standby + assert ( + roles.primary.psql_capture( + "SELECT count(*) FROM pg_prepared_xacts" + ).stdout.strip() + == "1" + ), "restore prepared xacts from records with primary down" + roles.standby.enable_streaming(roles.primary) + roles.standby.start() + roles.primary.psql_capture("COMMIT PREPARED 'xact_009_12'") + _standby_mvcc(roles) + + +def _standby_mvcc(roles): + p, s = roles.primary, roles.standby + nm = roles.name + p.psql_capture( + "SET synchronous_commit='remote_apply';\n" + "CREATE TABLE t_009_tbl_standby_mvcc (id int, msg text);\nBEGIN;\n" + "INSERT INTO t_009_tbl_standby_mvcc VALUES (1, 'issued to {n}');\n" + "SAVEPOINT s1;\n" + "INSERT INTO t_009_tbl_standby_mvcc VALUES (2, 'issued to {n}');\n" + "PREPARE TRANSACTION 'xact_009_standby_mvcc';".format(n=nm) + ) + p.stop() + s.restart() + sess = s.background_psql("postgres", on_error_stop=True) + sess.query_safe("BEGIN ISOLATION LEVEL REPEATABLE READ") + assert ( + sess.query_safe("SELECT count(*) FROM t_009_tbl_standby_mvcc").strip() == "0" + ), "prepared xact not visible in standby before commit" + p.start() + p.psql_capture( + "SET synchronous_commit='remote_apply';\n" + "COMMIT PREPARED 'xact_009_standby_mvcc';" + ) + assert ( + sess.query_safe("SELECT count(*) FROM t_009_tbl_standby_mvcc").strip() == "0" + ), "committed prepared xact not visible to old snapshot" + sess.query_safe("COMMIT") + assert ( + sess.query_safe("SELECT count(*) FROM t_009_tbl_standby_mvcc").strip() == "2" + ), "committed prepared xact visible to new snapshot" + sess.quit() + _ddl_phase(roles) + + +def _ddl_phase(roles): + p, s = roles.primary, roles.standby + nm = roles.name + p.psql_capture( + "BEGIN;\nCREATE TABLE t_009_tbl2 (id int, msg text);\nSAVEPOINT s1;\n" + "INSERT INTO t_009_tbl2 VALUES (27, 'issued to {n}');\n" + "PREPARE TRANSACTION 'xact_009_13';\nCHECKPOINT;\n" + "COMMIT PREPARED 'xact_009_13';".format(n=nm) + ) + lsn = p.safe_psql("SELECT pg_current_wal_lsn()") + assert s.poll_query_until( + "SELECT '{}'::pg_lsn <= pg_last_wal_replay_lsn()".format(lsn) + ), "Timed out while waiting for standby to catch up" + assert ( + s.psql_capture("SELECT count(*) FROM t_009_tbl2").stdout.strip() == "1" + ), "replay prepared xact with DDL" + _issue( + p, + "BEGIN;\nCREATE TABLE t_009_tbl3 (id int, msg text);\nSAVEPOINT s1;\n" + "INSERT INTO t_009_tbl3 VALUES (28, 'issued to {n}');\n" + "PREPARE TRANSACTION 'xact_009_14';\n".format(n=nm) + + _ddl_prep("xact_009_15", "t_009_tbl4", 29, nm), + ) + p.teardown_node() + p.start() + assert _issue(p, "COMMIT PREPARED 'xact_009_14'") == 0, "commit DDL after teardown" + assert ( + _issue(p, "ROLLBACK PREPARED 'xact_009_15'") == 0 + ), "rollback DDL after teardown" + _issue( + p, + _ddl_prep("xact_009_16", "t_009_tbl5", 30, nm) + + _ddl_prep("xact_009_17", "t_009_tbl6", 31, nm), + ) + p.stop() + p.start() + assert _issue(p, "COMMIT PREPARED 'xact_009_16'") == 0, "commit DDL after restart" + assert ( + _issue(p, "ROLLBACK PREPARED 'xact_009_17'") == 0 + ), "rollback DDL after restart" + + +def _ddl_prep(name, table, val, nm): + return ( + "BEGIN;\nCREATE TABLE {t} (id int, msg text);\nSAVEPOINT s1;\n" + "INSERT INTO {t} VALUES ({v}, 'issued to {n}');\n" + "PREPARE TRANSACTION '{name}';\n".format(t=table, v=val, n=nm, name=name) + ) + + +_EXPECTED_TBL = ( + "1|issued to london\n2|issued to london\n5|issued to london\n" + "6|issued to london\n9|issued to london\n10|issued to london\n" + "11|issued to london\n12|issued to london\n13|issued to london\n" + "14|issued to london\n15|issued to london\n16|issued to london\n" + "17|issued to london\n18|issued to london\n19|issued to london\n" + "20|issued to london\n21|issued to london\n22|issued to london\n" + "23|issued to paris\n24|issued to paris\n25|issued to london\n" + "26|issued to london" +) + + +def _final_checks(roles): + p, s = roles.primary, roles.standby + assert ( + p.psql_capture("SELECT count(*) FROM pg_prepared_xacts").stdout.strip() == "0" + ), "no uncommitted prepared xacts on primary" + assert ( + p.psql_capture("SELECT * FROM t_009_tbl ORDER BY id").stdout.strip() + == _EXPECTED_TBL + ), "expected t_009_tbl data on primary" + assert ( + p.psql_capture("SELECT * FROM t_009_tbl2").stdout.strip() + == "27|issued to paris" + ), "expected t_009_tbl2 data on primary" + assert ( + s.psql_capture("SELECT count(*) FROM pg_prepared_xacts").stdout.strip() == "0" + ), "no uncommitted prepared xacts on standby" + assert ( + s.psql_capture("SELECT * FROM t_009_tbl ORDER BY id").stdout.strip() + == _EXPECTED_TBL + ), "expected t_009_tbl data on standby" diff --git a/src/test/recovery/pyt/test_010_logical_decoding_timelines.py b/src/test/recovery/pyt/test_010_logical_decoding_timelines.py new file mode 100644 index 0000000000000..4945ffb0cdc26 --- /dev/null +++ b/src/test/recovery/pyt/test_010_logical_decoding_timelines.py @@ -0,0 +1,137 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/010_logical_decoding_timelines.pl. + +Logical replication slots follow timeline changes across a filesystem-level +base backup and a standby promotion: a slot created before the backup is usable +on the promoted replica (decoding data written before, after, and post-failover) +while a slot created after the backup never reaches the replica; a dropped +database's slot is removed on the standby; and the physical slot's xmin / +catalog_xmin are tracked. Output is cross-checked via pg_recvlogical. +""" + +import re + +import pypg + +_EXPECTED = ( + "BEGIN\n" + "table public.decoding: INSERT: blah[text]:'beforebb'\n" + "COMMIT\n" + "BEGIN\n" + "table public.decoding: INSERT: blah[text]:'afterbb'\n" + "COMMIT\n" + "BEGIN\n" + "table public.decoding: INSERT: blah[text]:'after failover'\n" + "COMMIT" +) + + +def test_010_logical_decoding_timelines(create_pg): + """Logical slots follow timelines across fs backup and standby promotion.""" + primary = create_pg( + "primary", allows_streaming=True, has_archiving=True, start=False + ) + primary.append_conf( + "\nwal_level = 'logical'\nmax_replication_slots = 3\nmax_wal_senders = 2\n" + "log_min_messages = 'debug2'\nhot_standby_feedback = on\n" + "wal_receiver_status_interval = 1\n" + ) + primary.dump_info() + primary.start() + primary.safe_psql( + "SELECT pg_create_logical_replication_slot('before_basebackup', " + "'test_decoding');" + ) + primary.safe_psql("CREATE TABLE decoding(blah text);") + primary.safe_psql("INSERT INTO decoding(blah) VALUES ('beforebb');") + primary.safe_psql("CREATE DATABASE dropme;") + primary.safe_psql( + "SELECT pg_create_logical_replication_slot('dropme_slot', 'test_decoding');", + dbname="dropme", + ) + primary.safe_psql("CHECKPOINT;") + backup_name = "b1" + primary.stop() + primary.backup_fs_cold(backup_name) + primary.start() + primary.safe_psql("SELECT pg_create_physical_replication_slot('phys_slot');") + replica = create_pg( + "replica", + from_backup=(primary, backup_name), + has_streaming=True, + has_restoring=True, + start=False, + ) + replica.append_conf("primary_slot_name = 'phys_slot'") + replica.start() + assert ( + primary.psql_capture("DROP DATABASE dropme").rc == 0 + ), "dropped DB with logical slot OK on primary" + primary.wait_for_catchup(replica) + assert ( + replica.safe_psql("SELECT 1 FROM pg_database WHERE datname = 'dropme'") == "" + ), "dropped DB dropme on standby" + assert ( + replica.slot("dropme_slot")["plugin"] == "" + ), "logical slot was actually dropped on standby" + primary.safe_psql( + "SELECT pg_create_logical_replication_slot('after_basebackup', " + "'test_decoding');" + ) + primary.safe_psql("INSERT INTO decoding(blah) VALUES ('afterbb');") + primary.safe_psql("CHECKPOINT;") + assert ( + replica.safe_psql( + "SELECT slot_name FROM pg_replication_slots ORDER BY slot_name" + ) + == "before_basebackup" + ), "Expected to find only slot before_basebackup on replica" + assert primary.poll_query_until( + "SELECT catalog_xmin IS NOT NULL FROM pg_replication_slots " + "WHERE slot_name = 'phys_slot'" + ), "slot's catalog_xmin never became set" + phys_slot = primary.slot("phys_slot") + assert phys_slot["xmin"] != "", "xmin assigned on physical slot of primary" + assert ( + phys_slot["catalog_xmin"] != "" + ), "catalog_xmin assigned on physical slot of primary" + assert int(phys_slot["xmin"]) >= int( + phys_slot["catalog_xmin"] + ), "xmin on physical slot must not be lower than catalog_xmin" + primary.safe_psql("CHECKPOINT") + primary.wait_for_catchup(replica, "write") + primary.stop("immediate") + replica.promote() + replica.safe_psql("INSERT INTO decoding(blah) VALUES ('after failover');") + res = replica.psql_capture( + "SELECT data FROM pg_logical_slot_peek_changes('after_basebackup', NULL, " + "NULL, 'include-xids', '0', 'skip-empty-xacts', '1');" + ) + assert res.rc == 3, "replaying from after_basebackup slot fails" + assert re.search( + r'replication slot "after_basebackup" does not exist', res.stderr + ), "after_basebackup slot missing" + res = replica.psql_capture( + "SELECT data FROM pg_logical_slot_peek_changes('before_basebackup', NULL, " + "NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", + timeout=pypg.test_timeout_default(), + ) + assert res.rc == 0, "replay from slot before_basebackup succeeds" + assert res.stdout == _EXPECTED, "decoded expected data from slot before_basebackup" + assert res.stderr == "", "replay from slot before_basebackup produces no stderr" + endpos = replica.safe_psql( + "SELECT lsn FROM pg_logical_slot_peek_changes('before_basebackup', NULL, " + "NULL) ORDER BY lsn DESC LIMIT 1;" + ) + stdout = replica.pg_recvlogical_upto( + "postgres", + "before_basebackup", + endpos, + pypg.test_timeout_default(), + options={"include-xids": "0", "skip-empty-xacts": "1"}, + ) + assert ( + stdout.rstrip("\n") == _EXPECTED + ), "got same output from walsender via pg_recvlogical on before_basebackup" + replica.teardown_node() diff --git a/src/test/recovery/pyt/test_012_subtransactions.py b/src/test/recovery/pyt/test_012_subtransactions.py new file mode 100644 index 0000000000000..de47c50d695aa --- /dev/null +++ b/src/test/recovery/pyt/test_012_subtransactions.py @@ -0,0 +1,111 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/012_subtransactions.pl. + +Hot-standby handling of large subtransaction trees and prepared transactions +across restarts and promotions: nextXid is advanced correctly past prepared +subtransactions, a committed 127-deep subxid tree is visible on the standby and +survives promotion, and a PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction is +restored and resolvable (commit/rollback) on a promoted standby. +""" + +_FUNC = ( + "CREATE OR REPLACE FUNCTION hs_subxids (n integer)\n" + "RETURNS void\n" + "LANGUAGE plpgsql\n" + "AS $$\n" + "BEGIN\n" + " IF n <= 0 THEN RETURN; END IF;\n" + " INSERT INTO t_012_tbl VALUES (n);\n" + " PERFORM hs_subxids(n - 1);\n" + " RETURN;\n" + "EXCEPTION WHEN raise_exception THEN NULL; END;\n" + "$$;" +) +_SUM = "SELECT coalesce(sum(id),-1) FROM t_012_tbl" + + +def test_012_subtransactions(create_pg): + """Subtransaction/prepared-xact visibility across restart, promotion, swap.""" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf( + "\n\tmax_prepared_transactions = 10\n\tlog_checkpoints = true\n" + ) + primary.start() + primary.backup("primary_backup") + primary.psql_capture("CREATE TABLE t_012_tbl (id int)") + standby = create_pg( + "standby", + from_backup=(primary, "primary_backup"), + has_streaming=True, + start=False, + ) + standby.start() + primary.append_conf("\n\tsynchronous_standby_names = '*'\n") + primary.psql_capture("SELECT pg_reload_conf()") + primary.psql_capture( + "\n\tBEGIN;\n\tDELETE FROM t_012_tbl;\n\tINSERT INTO t_012_tbl VALUES (43);\n" + + "".join( + "\tSAVEPOINT s{n};\n\tINSERT INTO t_012_tbl VALUES (43);\n".format(n=n) + for n in range(1, 6) + ) + + "\tPREPARE TRANSACTION 'xact_012_1';\n\tCHECKPOINT;" + ) + primary.stop() + primary.start() + primary.psql_capture( + "\n\tBEGIN;\n\tINSERT INTO t_012_tbl VALUES (142);\n\tROLLBACK;\n" + "\tCOMMIT PREPARED 'xact_012_1';" + ) + assert ( + primary.psql_capture("SELECT count(*) FROM t_012_tbl").stdout == "6" + ), "Check nextXid handling for prepared subtransactions" + primary.psql_capture("DELETE FROM t_012_tbl") + primary.psql_capture(_FUNC) + primary.psql_capture("\n\tBEGIN;\n\tSELECT hs_subxids(127);\n\tCOMMIT;") + primary.wait_for_catchup(standby) + assert standby.psql_capture(_SUM).stdout == "8128", "Visible" + primary.stop() + standby.promote() + assert standby.psql_capture(_SUM).stdout == "8128", "Visible" + primary, standby = standby, primary + standby.enable_streaming(primary) + standby.start() + assert standby.psql_capture(_SUM).stdout == "8128", "Visible" + primary.psql_capture("DELETE FROM t_012_tbl") + primary.psql_capture(_FUNC) + primary.psql_capture( + "\n\tBEGIN;\n\tSELECT hs_subxids(127);\n\tPREPARE TRANSACTION 'xact_012_1';" + ) + primary.wait_for_catchup(standby) + assert standby.psql_capture(_SUM).stdout == "-1", "Not visible" + primary.stop() + standby.promote() + assert standby.psql_capture(_SUM).stdout == "-1", "Not visible" + primary, standby = standby, primary + standby.enable_streaming(primary) + standby.start() + assert primary.psql_capture("COMMIT PREPARED 'xact_012_1'").rc == 0, ( + "Restore of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted " + "standby" + ) + assert primary.psql_capture(_SUM).stdout == "8128", "Visible" + primary.psql_capture("DELETE FROM t_012_tbl") + primary.psql_capture( + "\n\tBEGIN;\n\tSELECT hs_subxids(201);\n\tPREPARE TRANSACTION 'xact_012_1';" + ) + primary.wait_for_catchup(standby) + assert standby.psql_capture(_SUM).stdout == "-1", "Not visible" + primary.stop() + standby.promote() + assert standby.psql_capture(_SUM).stdout == "-1", "Not visible" + primary, standby = standby, primary + standby.enable_streaming(primary) + standby.start() + assert primary.psql_capture("ROLLBACK PREPARED 'xact_012_1'").rc == 0, ( + "Rollback of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted " + "standby" + ) + assert primary.psql_capture(_SUM).stdout == "-1", "Not visible" + primary.stop() + standby.stop() diff --git a/src/test/recovery/pyt/test_013_crash_restart.py b/src/test/recovery/pyt/test_013_crash_restart.py new file mode 100644 index 0000000000000..8ee7f09ae222a --- /dev/null +++ b/src/test/recovery/pyt/test_013_crash_restart.py @@ -0,0 +1,150 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/013_crash_restart.pl. + +Restarts of postgres due to crashes of a subprocess. Two background psql +sessions are used: one whose backend is killed (triggering crash-restart) and +one long-running monitor that detects when the crash happened. +""" + +# stderr patterns indicating the killed backend's connection went away. +_KILLME_SIGQUIT = ( + r"WARNING: terminating connection because of unexpected SIGQUIT signal" + r"|server closed the connection unexpectedly" + r"|connection to server was lost|could not send data to server" +) +_KILLME_SIGKILL = ( + r"server closed the connection unexpectedly" + r"|connection to server was lost|could not send data to server" +) +_MONITOR_DIED = ( + r"WARNING: terminating connection because of crash of another server process" + r"|server closed the connection unexpectedly" + r"|connection to server was lost|could not send data to server" +) +_DIGIT_LINE = r"[0-9]+[\r\n]" + + +def _monitor_connect(monitor): + monitor.query_until( + r"psql-connected", "SELECT $$psql-connected$$;\nSELECT pg_sleep(3600);\n" + ) + + +def _reconnect(node): + # poll until a fresh psql with empty input produces empty output: the + # server is accepting connections again. + assert node.poll_query_until("", expected=""), "reconnected after crash" + + +def _crash_cycle(pg_bin, node, killme, monitor, signal, killme_rx, insert_sql): + """Acquire the backend pid, kill it with signal, and detect the restart.""" + pid = killme.query_until(_DIGIT_LINE, "SELECT pg_backend_pid();\n").strip() + killme.query_until( + r"in-progress-before-sig" + ("quit" if signal == "QUIT" else "kill"), insert_sql + ) + _monitor_connect(monitor) + + pg_bin.command_ok( + ["pg_ctl", "kill", signal, pid], "killed process with SIG{}".format(signal) + ) + + killme.wait_for_stderr(killme_rx, "SELECT 1;\n") + killme.finish() + monitor.wait_for_stderr(_MONITOR_DIED) + monitor.finish() + _reconnect(node) + + +def test_crash_restart(pg_bin, create_pg): + """A crashing backend triggers crash-restart; committed rows survive.""" + node = create_pg("primary", allows_streaming=True, start=False) + node.append_conf( + "shared_preload_libraries = 'pg_stat_statements'\n" + "pg_stat_statements.max = 50000\n" + "compute_query_id = 'regress'" + ) + node.start() + + node.safe_psql( + "ALTER SYSTEM SET restart_after_crash = 1;\n" + "ALTER SYSTEM SET log_connections = receipt;\n" + "SELECT pg_reload_conf();" + ) + stats_reset = node.safe_psql( + "CREATE EXTENSION pg_stat_statements;\n" + "SELECT stats_reset FROM pg_stat_statements_info;" + ) + + killme = node.background_psql() + monitor = node.background_psql() + + # SIGQUIT: the backend exits after emitting an error. + killme.query_until( + _DIGIT_LINE, + "CREATE TABLE alive(status text);\n" + "INSERT INTO alive VALUES($$committed-before-sigquit$$);\n" + "SELECT pg_backend_pid();\n", + ) + killme.clear() + _crash_cycle( + pg_bin, + node, + killme, + monitor, + "QUIT", + _KILLME_SIGQUIT, + "BEGIN;\n" + "INSERT INTO alive VALUES($$in-progress-before-sigquit$$) RETURNING status;\n", + ) + + # Restart the psql sessions now that the crash cycle finished. + killme.restart() + monitor.restart() + + stats_reset_after = node.safe_psql( + "SELECT stats_reset FROM pg_stat_statements_info" + ) + assert stats_reset != stats_reset_after, "pg_stat_statements was reset by restart" + + # SIGKILL: the backend exits without being able to emit an error. + _crash_cycle( + pg_bin, + node, + killme, + monitor, + "KILL", + _KILLME_SIGKILL, + "INSERT INTO alive VALUES($$committed-before-sigkill$$) RETURNING status;\n" + "BEGIN;\n" + "INSERT INTO alive VALUES($$in-progress-before-sigkill$$) RETURNING status;\n", + ) + + assert node.safe_psql("SELECT * FROM alive") == ( + "committed-before-sigquit\ncommitted-before-sigkill" + ), "data survived" + assert ( + node.safe_psql( + "INSERT INTO alive VALUES($$before-orderly-restart$$) RETURNING status" + ) + == "before-orderly-restart" + ), "can still write after crash restart" + + assert node.poll_query_until( + "SELECT count(*) = 1 FROM pg_stat_activity " + "WHERE backend_type = 'logical replication launcher'" + ), "logical replication launcher restarted after crash" + + # An orderly restart still works. + node.restart() + assert node.safe_psql("SELECT * FROM alive") == ( + "committed-before-sigquit\ncommitted-before-sigkill\nbefore-orderly-restart" + ), "data survived" + assert ( + node.safe_psql( + "INSERT INTO alive VALUES($$after-orderly-restart$$) RETURNING status" + ) + == "after-orderly-restart" + ), "can still write after orderly restart" + + node.stop() diff --git a/src/test/recovery/pyt/test_014_unlogged_reinit.py b/src/test/recovery/pyt/test_014_unlogged_reinit.py new file mode 100644 index 0000000000000..7ccf85f88b27d --- /dev/null +++ b/src/test/recovery/pyt/test_014_unlogged_reinit.py @@ -0,0 +1,96 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/014_unlogged_reinit.pl. + +Unlogged tables are properly reinitialized after a crash. +""" + + +def _exists(node, relpath): + return (node.datadir / relpath).is_file() + + +def test_unlogged_reinit(create_pg, tmp_path): + """Unlogged relation forks are reinitialized from the init fork on crash.""" + node = create_pg("main") + + node.safe_psql("CREATE UNLOGGED TABLE base_unlogged (id int)") + node.safe_psql("CREATE UNLOGGED SEQUENCE seq_unlogged") + + base = node.safe_psql("select pg_relation_filepath('base_unlogged')") + seq = node.safe_psql("select pg_relation_filepath('seq_unlogged')") + + # Main and init forks should exist. + assert _exists(node, base + "_init"), "table init fork exists" + assert _exists(node, base), "table main fork exists" + assert _exists(node, seq + "_init"), "sequence init fork exists" + assert _exists(node, seq), "sequence main fork exists" + + assert node.safe_psql("SELECT nextval('seq_unlogged')") == "1", "sequence nextval" + assert node.safe_psql("SELECT nextval('seq_unlogged')") == "2", "sequence nextval" + + # Unlogged table in a tablespace. + tablespace_dir = tmp_path / "ts1" + tablespace_dir.mkdir() + node.safe_psql("CREATE TABLESPACE ts1 LOCATION '{}'".format(tablespace_dir)) + node.safe_psql("CREATE UNLOGGED TABLE ts1_unlogged (id int) TABLESPACE ts1") + ts1 = node.safe_psql("select pg_relation_filepath('ts1_unlogged')") + assert _exists(node, ts1 + "_init"), "init fork in tablespace exists" + assert _exists(node, ts1), "main fork in tablespace exists" + + # More unlogged sequences for testing. + node.safe_psql("CREATE UNLOGGED SEQUENCE seq_unlogged2") + node.safe_psql("ALTER SEQUENCE seq_unlogged2 INCREMENT 2") + node.safe_psql("SELECT nextval('seq_unlogged2')") + + node.safe_psql( + "CREATE UNLOGGED TABLE tab_seq_unlogged3 " + "(a int GENERATED ALWAYS AS IDENTITY)" + ) + node.safe_psql("TRUNCATE tab_seq_unlogged3 RESTART IDENTITY") + node.safe_psql("INSERT INTO tab_seq_unlogged3 DEFAULT VALUES") + + # Crash the postmaster. + node.stop("immediate") + + # Fake forks that recovery should remove. + (node.datadir / (base + "_vm")).write_text("TEST_VM", encoding="utf-8") + (node.datadir / (base + "_fsm")).write_text("TEST_FSM", encoding="utf-8") + + # Remove main forks to test that they are recopied from init. + (node.datadir / base).unlink() + (node.datadir / seq).unlink() + + (node.datadir / (ts1 + "_vm")).write_text("TEST_VM", encoding="utf-8") + (node.datadir / (ts1 + "_fsm")).write_text("TEST_FSM", encoding="utf-8") + (node.datadir / ts1).unlink() + + node.start() + + assert _exists(node, base + "_init"), "table init fork in base still exists" + assert _exists(node, base), "table main fork in base recreated at startup" + assert not _exists(node, base + "_vm"), "vm fork in base removed at startup" + assert not _exists(node, base + "_fsm"), "fsm fork in base removed at startup" + + assert _exists(node, seq + "_init"), "sequence init fork still exists" + assert _exists(node, seq), "sequence main fork recreated at startup" + + assert ( + node.safe_psql("SELECT nextval('seq_unlogged')") == "1" + ), "nextval after restart" + assert ( + node.safe_psql("SELECT nextval('seq_unlogged')") == "2" + ), "nextval after restart" + + assert _exists(node, ts1 + "_init"), "init fork still exists in tablespace" + assert _exists(node, ts1), "main fork in tablespace recreated at startup" + assert not _exists(node, ts1 + "_vm"), "vm fork in tablespace removed at startup" + assert not _exists(node, ts1 + "_fsm"), "fsm fork in tablespace removed at startup" + + assert node.safe_psql("SELECT nextval('seq_unlogged2')") == "1", "altered nextval" + assert node.safe_psql("SELECT nextval('seq_unlogged2')") == "3", "altered nextval" + + node.safe_psql("INSERT INTO tab_seq_unlogged3 VALUES (DEFAULT), (DEFAULT)") + assert ( + node.safe_psql("SELECT * FROM tab_seq_unlogged3") == "1\n2" + ), "reset sequence nextval after restart" diff --git a/src/test/recovery/pyt/test_015_promotion_pages.py b/src/test/recovery/pyt/test_015_promotion_pages.py new file mode 100644 index 0000000000000..5c0860f41d235 --- /dev/null +++ b/src/test/recovery/pyt/test_015_promotion_pages.py @@ -0,0 +1,58 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/015_promotion_pages.pl. + +Promotion handling with WAL records generated post-promotion before the first +checkpoint, checking for invalid page references against minRecoveryPoint. +""" + + +def test_promotion_pages(create_pg): + """A promoted standby survives crash recovery without invalid page refs.""" + alpha = create_pg("alpha", allows_streaming=True, start=False) + # wal_log_hints=off is important to get invalid page references. + alpha.append_conf("wal_log_hints = off") + alpha.start() + + alpha.backup("bkp") + bravo = create_pg( + "bravo", from_backup=(alpha, "bkp"), has_streaming=True, start=False + ) + bravo.append_conf("checkpoint_timeout=1h") + bravo.start() + + alpha.safe_psql("create table test1 (a int)") + alpha.safe_psql("insert into test1 select generate_series(1, 10000)") + alpha.safe_psql("checkpoint") + # This vacuum sets visibility map bits and creates problematic WAL records. + alpha.safe_psql("vacuum verbose test1") + alpha.wait_for_catchup(bravo) + + # Force a checkpoint on the standby so redo does not start from an older + # point that would include the initial table/page additions. + bravo.safe_psql("checkpoint") + + # Move minRecoveryPoint beyond the previous vacuum with a dummy table. + alpha.safe_psql("create table test2 (a int, b bytea)") + alpha.safe_psql( + "insert into test2 select generate_series(1,10000), " + "sha256(random()::text::bytea)" + ) + alpha.safe_psql("truncate test2") + alpha.wait_for_catchup(bravo) + + # Promote: minRecoveryPoint is reinitialized so WAL replays to the end. + bravo.promote() + + # New page references on the promoted standby before its first checkpoint. + bravo.safe_psql("truncate test1") + bravo.safe_psql("vacuum verbose test1") + bravo.safe_psql("insert into test1 select generate_series(1,1000)") + + # Crash-stop and restart: replay must not see invalid page references. + bravo.stop("immediate") + bravo.start() + + assert ( + bravo.safe_psql("SELECT count(*) FROM test1") == "1000" + ), "Check that table state is correct" diff --git a/src/test/recovery/pyt/test_016_min_consistency.py b/src/test/recovery/pyt/test_016_min_consistency.py new file mode 100644 index 0000000000000..76fa1a4d1e53a --- /dev/null +++ b/src/test/recovery/pyt/test_016_min_consistency.py @@ -0,0 +1,96 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/016_min_consistency.pl. + +Offline consistency check of on-disk pages against the control file's minimum +recovery LSN, exercising minRecoveryPoint updates from both the startup process +and the checkpointer. +""" + +import re +import struct + + +def _find_largest_lsn(blocksize, filename): + """Largest page LSN in a relation file, as an integer (hi << 32 | lo). + + The page LSN is stored as two little-endian 4-byte numbers at the start of + each block. + """ + max_hi, max_lo = 0, 0 + with open(filename, "rb") as handle: + while True: + buf = handle.read(blocksize) + if not buf: + break + assert len(buf) == blocksize, "short read from {}".format(filename) + hi, lo = struct.unpack_from(" max_hi or (hi == max_hi and lo > max_lo): + max_hi, max_lo = hi, lo + return (max_hi << 32) | max_lo + + +def _lsn_to_int(lsn): + hi, lo = lsn.split("/") + return (int(hi, 16) << 32) | int(lo, 16) + + +def test_min_consistency(pg_bin, create_pg): + """minRecoveryPoint is never older than the max page LSN on disk.""" + primary = create_pg("primary", allows_streaming=True, start=False) + # Tiny shared_buffers forces non-startup processes (checkpointer) to flush + # buffers and update minRecoveryPoint; autovacuum off keeps it deterministic. + primary.append_conf("shared_buffers = 128kB\nautovacuum = off") + primary.start() + + primary.backup("bkp") + standby = create_pg( + "standby", from_backup=(primary, "bkp"), has_streaming=True, start=False + ) + standby.start() + + primary.safe_psql( + "CREATE TABLE test1 (a int) WITH (fillfactor = 10);\n" + "INSERT INTO test1 SELECT generate_series(1, 10000);" + ) + # Checkpoint + update forces post-checkpoint FPIs the startup process + # replays, updating minRecoveryPoint. + primary.safe_psql("CHECKPOINT;") + primary.safe_psql("UPDATE test1 SET a = a + 1;") + primary.wait_for_catchup(standby) + + # Fill the standby's shared buffers. + standby.safe_psql("SELECT count(*) FROM test1;") + + # This update generates no FPIs, so the startup process won't flush pages. + primary.safe_psql("UPDATE test1 SET a = a + 1;") + + blocksize = int( + primary.safe_psql( + "SELECT setting::int FROM pg_settings WHERE name = 'block_size';" + ) + ) + relfilenode = primary.safe_psql("SELECT pg_relation_filepath('test1'::regclass);") + primary.wait_for_catchup(standby) + + # Restart point on the standby makes the checkpointer update + # minRecoveryPoint. + standby.safe_psql("CHECKPOINT;") + + # Crash the primary so the standby never sees a shutdown checkpoint; the + # standby stops cleanly so its checkpointer records the restart point. + primary.stop("immediate") + standby.stop("fast") + + offline_max_lsn = _find_largest_lsn(blocksize, str(standby.datadir / relfilenode)) + + result = pg_bin.result(["pg_controldata", str(standby.datadir)]) + match = re.search( + r"^Minimum recovery ending location:\s*(.*)$", result.stdout, re.MULTILINE + ) + assert match, "No minRecoveryPoint in control file found" + offline_recovery_lsn = _lsn_to_int(match.group(1).strip()) + + assert ( + offline_recovery_lsn >= offline_max_lsn + ), "table data is consistent with minRecoveryPoint" diff --git a/src/test/recovery/pyt/test_017_shm.py b/src/test/recovery/pyt/test_017_shm.py new file mode 100644 index 0000000000000..7667771cb5525 --- /dev/null +++ b/src/test/recovery/pyt/test_017_shm.py @@ -0,0 +1,91 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/017_shm.pl. + +Exercises PostgreSQL's System V shared-memory startup interlock. A foreign shm +segment created with the same key the server derives (the data directory inode) +must not stop the server (it recycles its own key); after a kill9 with a live +backend still holding the shm, a fresh start (and single-user mode) must refuse +with "pre-existing shared memory block" until the orphaned backend is gone. +""" + +import os +import sys +import time + +import pytest + +import pypg +from pypg.sysv_shm import SysVSharedMemory + + +def _poll_start(node): + """Start node, retrying (cf. the Perl poll_start helper).""" + for _ in range(10 * pypg.test_timeout_default()): + if node.start(fail_ok=True): + return + node.stop("fast") + time.sleep(0.1) + node.start() + + +@pytest.mark.skipif(sys.platform == "win32", reason="SysV shm unsupported") +def test_017_shm(create_pg, pg_bin): + """A live backend's shared memory blocks restart until that backend exits.""" + gnat = create_pg("gnat", start=False) + gnat_inode = os.stat(gnat.datadir).st_ino + conflict = SysVSharedMemory.create(gnat_inode) + gnat.start() + gnat.restart() # keeps the same shmem key + gnat.kill9() + _poll_start(gnat) # recycles its former shm key + if conflict: + conflict.remove() + gnat.kill9() + _poll_start(gnat) + gnat.stop() + conflict = SysVSharedMemory.create(gnat_inode) + gnat.start() + gnat.stop() + if conflict: + conflict.remove() + gnat.start() + _live_backend_blocks_restart(gnat, pg_bin) + gnat.stop() + + +def _live_backend_blocks_restart(gnat, pg_bin): + regress_shlib = os.environ["REGRESS_SHLIB"] + gnat.safe_psql( + "CREATE FUNCTION wait_pid(int)\n RETURNS void\n AS '{}'\n" + " LANGUAGE C STRICT;".format(regress_shlib) + ) + slow_query = "SELECT wait_pid(pg_backend_pid())" + slow = gnat.background_psql("postgres", on_error_stop=False) + slow.send(slow_query + ";\n") + # The background psql stores the statement with its trailing semicolon in + # pg_stat_activity (the Perl client uses --command, which strips it). + stored_query = slow_query + ";" + assert gnat.poll_query_until( + "SELECT 1 FROM pg_stat_activity WHERE query = '{}'".format(stored_query), "1" + ), "slow query started" + slow_pid = gnat.safe_psql( + "SELECT pid FROM pg_stat_activity WHERE query = '{}'".format(stored_query) + ) + gnat.kill9() + (gnat.datadir / "postmaster.pid").unlink(missing_ok=True) + gnat.rotate_logfile() + pre_existing = r"pre-existing shared memory block" + for _ in range(10 * pypg.test_timeout_default()): + if gnat.start(fail_ok=True) or gnat.log_matches(pre_existing): + break + time.sleep(0.1) + assert gnat.log_matches(pre_existing), "detected live backend via shared memory" + pg_bin.command_fails_like( + ["postgres", "--single", "-D", str(gnat.datadir), "template1"], + pre_existing, + "single-user mode detected live backend via shared memory", + ) + gnat.pg_ctl("kill", "QUIT", slow_pid) + slow.quit() + _poll_start(gnat) diff --git a/src/test/recovery/pyt/test_018_wal_optimize.py b/src/test/recovery/pyt/test_018_wal_optimize.py new file mode 100644 index 0000000000000..c7b2135df7824 --- /dev/null +++ b/src/test/recovery/pyt/test_018_wal_optimize.py @@ -0,0 +1,301 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/recovery/t/018_wal_optimize.pl. + +When wal_skip_threshold lets a relation created/rewritten in a transaction skip +WAL (file is fsynced at commit instead), an immediate crash must still leave the +data consistent after recovery. The same battery (CREATE/SET TABLESPACE, +TRUNCATE, TRUNCATE+INSERT/COPY, subtransaction SET TABLESPACE patterns, hint +bits, triggers on COPY/TRUNCATE, temp tables) is run twice, once with +wal_level='minimal' and once with 'replica', crashing and recovering after each +step. A final check confirms no orphan relfilenodes remain on disk. +""" + +import os +import re + +import pypg + + +def _crash_check(node, sql, query, expected, msg): + """Run sql, crash-restart, then assert query returns expected.""" + node.safe_psql(sql) + node.stop("immediate") + node.start() + assert node.safe_psql(query) == expected, msg + + +def _check_orphan_relfilenodes(node, test_name): + db_oid = node.safe_psql("SELECT oid FROM pg_database WHERE datname = 'postgres'") + prefix = "base/{}/".format(db_oid) + referenced = node.safe_psql( + "SELECT pg_relation_filepath(oid) FROM pg_class\n" + "WHERE reltablespace = 0 AND relpersistence <> 't' AND\n" + "pg_relation_filepath(oid) IS NOT NULL;" + ) + on_disk = sorted( + prefix + name + for name in pypg.slurp_dir(str(node.datadir / prefix)) + if name.isdigit() + ) + want = sorted(referenced.split("\n")) + assert on_disk == want, test_name + + +def _run_wal_optimize(create_pg, wal_level): + node = create_pg("node_{}".format(wal_level), start=False) + node.append_conf( + "\nwal_level = {}\nmax_prepared_transactions = 1\n" + "max_wal_senders = 0\nwal_log_hints = on\nwal_skip_threshold = 0\n".format( + wal_level + ) + ) + node.start() + wl = wal_level + tablespace_dir = os.path.join(node.basedir, "tablespace_other_{}".format(wal_level)) + os.mkdir(tablespace_dir) + _crash_check( + node, + "CREATE TABLE moved (id int);\nINSERT INTO moved VALUES (1);\n" + "CREATE TABLESPACE other LOCATION '{}';\nBEGIN;\n" + "ALTER TABLE moved SET TABLESPACE other;\n" + "CREATE TABLE originated (id int);\nINSERT INTO originated VALUES (1);\n" + "CREATE UNIQUE INDEX ON originated(id) TABLESPACE other;\n" + "COMMIT;".format(tablespace_dir), + "SELECT count(*) FROM moved;", + "1", + "wal_level = {}, CREATE+SET TABLESPACE".format(wl), + ) + assert ( + node.safe_psql( + "INSERT INTO originated VALUES (1) ON CONFLICT (id)\n" + " DO UPDATE set id = originated.id + 1\n RETURNING id;" + ) + == "2" + ), "wal_level = {}, CREATE TABLESPACE, CREATE INDEX".format(wl) + _crash_check( + node, + "BEGIN;\nCREATE TABLE trunc (id serial PRIMARY KEY);\n" + "TRUNCATE trunc;\nCOMMIT;", + "SELECT count(*) FROM trunc;", + "0", + "wal_level = {}, TRUNCATE with empty table".format(wl), + ) + _crash_check( + node, + "BEGIN;\nCREATE TABLE trunc_ins (id serial PRIMARY KEY);\n" + "INSERT INTO trunc_ins VALUES (DEFAULT);\nTRUNCATE trunc_ins;\n" + "INSERT INTO trunc_ins VALUES (DEFAULT);\nCOMMIT;", + "SELECT count(*), min(id) FROM trunc_ins;", + "1|2", + "wal_level = {}, TRUNCATE INSERT".format(wl), + ) + _crash_check( + node, + "BEGIN;\nCREATE TABLE twophase (id serial PRIMARY KEY);\n" + "INSERT INTO twophase VALUES (DEFAULT);\nTRUNCATE twophase;\n" + "INSERT INTO twophase VALUES (DEFAULT);\nPREPARE TRANSACTION 't';\n" + "COMMIT PREPARED 't';", + "SELECT count(*), min(id) FROM trunc_ins;", + "1|2", + "wal_level = {}, TRUNCATE INSERT PREPARE".format(wl), + ) + _crash_check( + node, + "SET wal_skip_threshold = '1GB';\nBEGIN;\n" + "CREATE TABLE noskip (id serial PRIMARY KEY);\n" + "INSERT INTO noskip (SELECT FROM generate_series(1, 20000) a) ;\nCOMMIT;", + "SELECT count(*) FROM noskip;", + "20000", + "wal_level = {}, end-of-xact WAL".format(wl), + ) + copy_file = os.path.join(node.basedir, "copy_data_{}.txt".format(wal_level)) + pypg.append_to_file(copy_file, "20000,30000\n20001,30001\n20002,30002") + _copy_battery(node, wl, copy_file) + _trigger_battery(node, wl, copy_file) + node.safe_psql("CREATE TEMP TABLE temp (id serial PRIMARY KEY, id2 text);") + node.stop("immediate") + node.start() + _check_orphan_relfilenodes( + node, "wal_level = {}, no orphan relfilenode remains".format(wl) + ) + + +def _copy_battery(node, wl, copy_file): + _crash_check( + node, + "BEGIN;\nCREATE TABLE ins_trunc (id serial PRIMARY KEY, id2 int);\n" + "INSERT INTO ins_trunc VALUES (DEFAULT, generate_series(1,10000));\n" + "TRUNCATE ins_trunc;\n" + "INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);\n" + "COPY ins_trunc FROM '{}' DELIMITER ',';\n" + "INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);\n" + "COMMIT;".format(copy_file), + "SELECT count(*) FROM ins_trunc;", + "5", + "wal_level = {}, TRUNCATE COPY INSERT".format(wl), + ) + _crash_check( + node, + "BEGIN;\nCREATE TABLE trunc_copy (id serial PRIMARY KEY, id2 int);\n" + "INSERT INTO trunc_copy VALUES (DEFAULT, generate_series(1,3000));\n" + "TRUNCATE trunc_copy;\nCOPY trunc_copy FROM '{}' DELIMITER ',';\n" + "COMMIT;".format(copy_file), + "SELECT count(*) FROM trunc_copy;", + "3", + "wal_level = {}, TRUNCATE COPY".format(wl), + ) + _crash_check( + node, + "BEGIN;\nCREATE TABLE spc_abort (id serial PRIMARY KEY, id2 int);\n" + "INSERT INTO spc_abort VALUES (DEFAULT, generate_series(1,3000));\n" + "TRUNCATE spc_abort;\nSAVEPOINT s;\n" + " ALTER TABLE spc_abort SET TABLESPACE other; ROLLBACK TO s;\n" + "COPY spc_abort FROM '{}' DELIMITER ',';\nCOMMIT;".format(copy_file), + "SELECT count(*) FROM spc_abort;", + "3", + "wal_level = {}, SET TABLESPACE abort subtransaction".format(wl), + ) + _crash_check( + node, + "BEGIN;\nCREATE TABLE spc_commit (id serial PRIMARY KEY, id2 int);\n" + "INSERT INTO spc_commit VALUES (DEFAULT, generate_series(1,3000));\n" + "TRUNCATE spc_commit;\n" + "SAVEPOINT s; ALTER TABLE spc_commit SET TABLESPACE other; RELEASE s;\n" + "COPY spc_commit FROM '{}' DELIMITER ',';\nCOMMIT;".format(copy_file), + "SELECT count(*) FROM spc_commit;", + "3", + "wal_level = {}, SET TABLESPACE commit subtransaction".format(wl), + ) + _crash_check( + node, + "BEGIN;\nCREATE TABLE spc_nest (id serial PRIMARY KEY, id2 int);\n" + "INSERT INTO spc_nest VALUES (DEFAULT, generate_series(1,3000));\n" + "TRUNCATE spc_nest;\nSAVEPOINT s;\n" + "\tALTER TABLE spc_nest SET TABLESPACE other;\n\tSAVEPOINT s2;\n" + "\t\tALTER TABLE spc_nest SET TABLESPACE pg_default;\n\tROLLBACK TO s2;\n" + "\tSAVEPOINT s2;\n\t\tALTER TABLE spc_nest SET TABLESPACE pg_default;\n" + "\tRELEASE s2;\nROLLBACK TO s;\n" + "COPY spc_nest FROM '{}' DELIMITER ',';\nCOMMIT;".format(copy_file), + "SELECT count(*) FROM spc_nest;", + "3", + "wal_level = {}, SET TABLESPACE nested subtransaction".format(wl), + ) + _crash_check( + node, + "CREATE TABLE spc_hint (id int);\nINSERT INTO spc_hint VALUES (1);\n" + "BEGIN;\nALTER TABLE spc_hint SET TABLESPACE other;\nCHECKPOINT;\n" + "SELECT * FROM spc_hint;\nINSERT INTO spc_hint VALUES (2);\nCOMMIT;", + "SELECT count(*) FROM spc_hint;", + "2", + "wal_level = {}, SET TABLESPACE, hint bit".format(wl), + ) + + +def _trigger_battery(node, wl, copy_file): + node.safe_psql( + "BEGIN;\nCREATE TABLE idx_hint (c int PRIMARY KEY);\n" + "SAVEPOINT q; INSERT INTO idx_hint VALUES (1); ROLLBACK TO q;\n" + "CHECKPOINT;\nINSERT INTO idx_hint VALUES (1);\n" + "INSERT INTO idx_hint VALUES (2);\nCOMMIT;" + ) + node.stop("immediate") + node.start() + res = node.psql_capture("INSERT INTO idx_hint VALUES (2);") + assert res.rc == 3, "wal_level = {}, unique index LP_DEAD".format(wl) + assert re.search( + r"violates unique", res.stderr + ), "wal_level = {}, unique index LP_DEAD message".format(wl) + _crash_check( + node, + "BEGIN;\nCREATE TABLE upd (id serial PRIMARY KEY, id2 int);\n" + "INSERT INTO upd (id, id2) VALUES (DEFAULT, generate_series(1,10000));\n" + "COPY upd FROM '{}' DELIMITER ',';\nUPDATE upd SET id2 = id2 + 1;\n" + "DELETE FROM upd;\nCOMMIT;".format(copy_file), + "SELECT count(*) FROM upd;", + "0", + "wal_level = {}, UPDATE touches two buffers for one row".format(wl), + ) + _crash_check( + node, + "BEGIN;\nCREATE TABLE ins_copy (id serial PRIMARY KEY, id2 int);\n" + "INSERT INTO ins_copy VALUES (DEFAULT, 1);\n" + "COPY ins_copy FROM '{}' DELIMITER ',';\nCOMMIT;".format(copy_file), + "SELECT count(*) FROM ins_copy;", + "4", + "wal_level = {}, INSERT COPY".format(wl), + ) + _crash_check( + node, + _INS_TRIG_SQL.format(copy_file), + "SELECT count(*) FROM ins_trig;", + "9", + "wal_level = {}, COPY with INSERT triggers".format(wl), + ) + _crash_check( + node, + _TRUNC_TRIG_SQL.format(copy_file), + "SELECT count(*) FROM trunc_trig;", + "4", + "wal_level = {}, TRUNCATE COPY with TRUNCATE triggers".format(wl), + ) + + +_INS_TRIG_SQL = """BEGIN; +CREATE TABLE ins_trig (id serial PRIMARY KEY, id2 text); +CREATE FUNCTION ins_trig_before_row_trig() RETURNS trigger + LANGUAGE plpgsql as $$ + BEGIN + IF new.id2 NOT LIKE 'triggered%' THEN + INSERT INTO ins_trig VALUES (DEFAULT, 'triggered row before' || NEW.id2); + END IF; + RETURN NEW; + END; $$; +CREATE FUNCTION ins_trig_after_row_trig() RETURNS trigger + LANGUAGE plpgsql as $$ + BEGIN + IF new.id2 NOT LIKE 'triggered%' THEN + INSERT INTO ins_trig VALUES (DEFAULT, 'triggered row after' || NEW.id2); + END IF; + RETURN NEW; + END; $$; +CREATE TRIGGER ins_trig_before_row_insert + BEFORE INSERT ON ins_trig + FOR EACH ROW EXECUTE PROCEDURE ins_trig_before_row_trig(); +CREATE TRIGGER ins_trig_after_row_insert + AFTER INSERT ON ins_trig + FOR EACH ROW EXECUTE PROCEDURE ins_trig_after_row_trig(); +COPY ins_trig FROM '{}' DELIMITER ','; +COMMIT;""" + +_TRUNC_TRIG_SQL = """BEGIN; +CREATE TABLE trunc_trig (id serial PRIMARY KEY, id2 text); +CREATE FUNCTION trunc_trig_before_stat_trig() RETURNS trigger + LANGUAGE plpgsql as $$ + BEGIN + INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before'); + RETURN NULL; + END; $$; +CREATE FUNCTION trunc_trig_after_stat_trig() RETURNS trigger + LANGUAGE plpgsql as $$ + BEGIN + INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before'); + RETURN NULL; + END; $$; +CREATE TRIGGER trunc_trig_before_stat_truncate + BEFORE TRUNCATE ON trunc_trig + FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_before_stat_trig(); +CREATE TRIGGER trunc_trig_after_stat_truncate + AFTER TRUNCATE ON trunc_trig + FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_after_stat_trig(); +INSERT INTO trunc_trig VALUES (DEFAULT, 1); +TRUNCATE trunc_trig; +COPY trunc_trig FROM '{}' DELIMITER ','; +COMMIT;""" + + +def test_018_wal_optimize(create_pg): + """WAL-skip optimizations stay crash-consistent at minimal and replica.""" + _run_wal_optimize(create_pg, "minimal") + _run_wal_optimize(create_pg, "replica") diff --git a/src/test/recovery/pyt/test_019_replslot_limit.py b/src/test/recovery/pyt/test_019_replslot_limit.py new file mode 100644 index 0000000000000..0b256715d094f --- /dev/null +++ b/src/test/recovery/pyt/test_019_replslot_limit.py @@ -0,0 +1,315 @@ +# Copyright (c) 2020-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/recovery/t/019_replslot_limit.pl. + +Exercises max_slot_wal_keep_size and the replication-slot wal_status lifecycle: +reserved -> extended -> unreserved -> lost as WAL accumulates beyond the limit, +the interaction with wal_keep_size, slot invalidation (logged on both primary and +standby), checkpoint non-blocking, walsender termination to release a slot under +WAL pressure (SIGSTOP/SIGCONT on the sender/receiver), and inactive_since +tracking for physical and logical slots. +""" + +import os +import signal +import time + +import pypg + + +def _slot_status(node, slot, cols="wal_status"): + return node.safe_psql( + "SELECT {} FROM pg_replication_slots WHERE slot_name = '{}'".format(cols, slot) + ) + + +def test_019_replslot_limit(create_pg): + """Slot wal_status transitions and invalidation behave per the WAL limits.""" + _scenario_status_lifecycle(create_pg) + _scenario_checkpoint_not_blocked(create_pg) + if os.name == "nt": + return + _scenario_walsender_termination(create_pg) + _scenario_inactive_since(create_pg) + + +def _scenario_status_lifecycle(create_pg): + primary = create_pg( + "primary", allows_streaming=True, extra=["--wal-segsize=1"], start=False + ) + primary.append_conf( + "\nmin_wal_size = 2MB\nmax_wal_size = 4MB\nlog_checkpoints = yes\n" + ) + primary.start() + primary.safe_psql("SELECT pg_create_physical_replication_slot('rep1')") + assert ( + _slot_status( + primary, + "rep1", + "restart_lsn IS NULL, wal_status is NULL, safe_wal_size is NULL", + ) + == "t|t|t" + ), 'check the state of non-reserved slot is "unknown"' + primary.backup("my_backup") + standby = create_pg( + "standby_1", from_backup=(primary, "my_backup"), has_streaming=True, start=False + ) + standby.append_conf("primary_slot_name = 'rep1'") + standby.start() + primary.wait_for_slot_catchup("rep1", "restart", primary.lsn("write")) + standby.stop() + assert ( + _slot_status(primary, "rep1", "wal_status, safe_wal_size IS NULL") + == "reserved|t" + ) + for n in (1, 4): + primary.advance_wal(n) + primary.safe_psql("CHECKPOINT;") + assert ( + _slot_status(primary, "rep1", "wal_status, safe_wal_size IS NULL") + == "reserved|t" + ) + standby.start() + primary.wait_for_slot_catchup("rep1", "restart", primary.lsn("write")) + standby.stop() + primary.append_conf("\nmax_slot_wal_keep_size = 6MB\n") + primary.reload() + assert _slot_status(primary, "rep1") == "reserved", "max_slot_wal_keep_size working" + primary.advance_wal(2) + primary.safe_psql("CHECKPOINT;") + assert _slot_status(primary, "rep1") == "reserved", "slot remains reserved" + _scenario_extended_unreserved_lost(primary, standby) + + +def _scenario_extended_unreserved_lost(primary, standby): + standby.start() + primary.wait_for_slot_catchup("rep1", "restart", primary.lsn("write")) + standby.stop() + primary.safe_psql( + "ALTER SYSTEM SET wal_keep_size to '8MB'; SELECT pg_reload_conf();" + ) + primary.advance_wal(6) + assert _slot_status(primary, "rep1") == "extended", "wal_keep_size overrides limit" + primary.safe_psql("ALTER SYSTEM SET wal_keep_size to 0; SELECT pg_reload_conf();") + standby.start() + primary.wait_for_slot_catchup("rep1", "restart", primary.lsn("write")) + standby.stop() + primary.advance_wal(6) + assert _slot_status(primary, "rep1") == "extended", 'state changes to "extended"' + primary.safe_psql("CHECKPOINT;") + primary.advance_wal(1) + assert ( + _slot_status(primary, "rep1", "wal_status, safe_wal_size <= 0") + == "unreserved|t" + ), 'state "unreserved"' + standby.start() + primary.wait_for_slot_catchup("rep1", "restart", primary.lsn("write")) + standby.stop() + assert not standby.log_matches( + "requested WAL segment [0-9A-F]+ has already been removed" + ), "required WAL segments still available" + _scenario_invalidation(primary, standby) + + +def _scenario_invalidation(primary, standby): + primary.safe_psql("CHECKPOINT;") + primary.safe_psql("ALTER SYSTEM SET max_wal_size='40MB'; SELECT pg_reload_conf()") + logstart = primary.current_log_position() + primary.advance_wal(7) + primary.safe_psql("ALTER SYSTEM RESET max_wal_size; SELECT pg_reload_conf()") + primary.safe_psql("CHECKPOINT;") + assert _wait_log( + primary, 'invalidating obsolete replication slot "rep1"', logstart + ), "slot invalidation logged" + assert ( + primary.safe_psql( + "SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size\n" + "FROM pg_replication_slots WHERE slot_name = 'rep1'" + ) + == "rep1|f|t|lost|" + ), 'slot inactive and "lost" persists' + assert _wait_log(primary, "checkpoint complete: ", logstart), "checkpoint ended" + redoseg = primary.safe_psql( + "SELECT pg_walfile_name(lsn) FROM pg_create_physical_replication_slot('s2', true)" + ) + oldestseg = primary.safe_psql( + "SELECT pg_ls_dir AS f FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ " + "'^[0-9A-F]{24}$' ORDER BY 1 LIMIT 1" + ) + primary.safe_psql("SELECT pg_drop_replication_slot('s2')") + assert oldestseg == redoseg, "segments have been removed" + sb_logstart = standby.current_log_position() + standby.start() + assert _wait_log( + standby, + 'This replication slot has been invalidated due to "wal_removed".', + sb_logstart, + ), "replication has been broken" + primary.stop() + standby.stop() + + +def _scenario_checkpoint_not_blocked(create_pg): + primary = create_pg("primary2", allows_streaming=True, start=False) + primary.append_conf( + "\nmin_wal_size = 32MB\nmax_wal_size = 32MB\nlog_checkpoints = yes\n" + ) + primary.start() + primary.safe_psql("SELECT pg_create_physical_replication_slot('rep1')") + primary.backup("my_backup2") + primary.stop() + primary.append_conf("\nmax_slot_wal_keep_size = 0\n") + primary.start() + standby = create_pg( + "standby_2", + from_backup=(primary, "my_backup2"), + has_streaming=True, + start=False, + ) + standby.append_conf("primary_slot_name = 'rep1'") + standby.start() + primary.advance_wal(1) + assert ( + primary.safe_psql( + "CHECKPOINT; SELECT 'finished';", timeout=pypg.test_timeout_default() + ) + == "finished" + ), "checkpoint command is not blocked" + primary.stop() + standby.stop() + + +def _scenario_walsender_termination(create_pg): + primary = create_pg( + "primary3", allows_streaming=True, extra=["--wal-segsize=1"], start=False + ) + primary.append_conf( + "\nmin_wal_size = 2MB\nmax_wal_size = 2MB\nlog_checkpoints = yes\n" + "max_slot_wal_keep_size = 1MB\n" + ) + primary.start() + primary.safe_psql("SELECT pg_create_physical_replication_slot('rep3')") + primary.backup("my_backup") + standby = create_pg( + "standby_3", from_backup=(primary, "my_backup"), has_streaming=True, start=False + ) + standby.append_conf("primary_slot_name = 'rep3'") + standby.start() + primary.wait_for_catchup(standby) + senderpid = _wait_single_pid(primary, "walsender") + receiverpid = int( + standby.safe_psql( + "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walreceiver'" + ) + ) + logstart = primary.current_log_position() + os.kill(senderpid, signal.SIGSTOP) + os.kill(receiverpid, signal.SIGSTOP) + primary.advance_wal(2) + assert _wait_log( + primary, + 'terminating process {} to release replication slot "rep3"'.format(senderpid), + logstart, + secs=True, + ), "walsender termination logged" + os.kill(senderpid, signal.SIGCONT) + assert primary.poll_query_until( + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep3'", "lost" + ), "timed out waiting for slot to be lost" + assert _wait_log( + primary, 'invalidating obsolete replication slot "rep3"', logstart, secs=True + ), "slot invalidation logged" + os.kill(receiverpid, signal.SIGCONT) + primary.stop() + standby.stop() + + +def _scenario_inactive_since(create_pg): + primary = create_pg("primary4", allows_streaming="logical", start=False) + primary.start() + primary.backup("my_backup4") + standby = create_pg( + "standby4", from_backup=(primary, "my_backup4"), has_streaming=True, start=False + ) + sb_slot = "sb4_slot" + standby.append_conf("primary_slot_name = '{}'".format(sb_slot)) + creation_time = primary.safe_psql("SELECT current_timestamp;") + primary.safe_psql( + "SELECT pg_create_physical_replication_slot(slot_name := '{}');".format(sb_slot) + ) + inactive_since = primary.validate_slot_inactive_since(sb_slot, creation_time) + standby.start() + primary.wait_for_catchup(standby) + assert ( + _slot_status(primary, sb_slot, "inactive_since IS NULL") == "t" + ), "active physical slot inactive_since is NULL" + standby.stop() + primary.restart() + assert ( + primary.safe_psql( + "SELECT inactive_since > '{}'::timestamptz FROM pg_replication_slots " + "WHERE slot_name = '{}' AND inactive_since IS NOT NULL;".format( + inactive_since, sb_slot + ) + ) + == "t" + ), "inactive physical slot inactive_since updated" + _scenario_inactive_since_logical(create_pg, primary) + + +def _scenario_inactive_since_logical(create_pg, publisher): + subscriber = create_pg("subscriber4", start=False) + subscriber.start() + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION pub FOR ALL TABLES") + creation_time = publisher.safe_psql("SELECT current_timestamp;") + lsub_slot = "lsub4_slot" + publisher.safe_psql( + "SELECT pg_create_logical_replication_slot(slot_name := '{}', " + "plugin := 'pgoutput');".format(lsub_slot) + ) + inactive_since = publisher.validate_slot_inactive_since(lsub_slot, creation_time) + subscriber.safe_psql( + "CREATE SUBSCRIPTION sub CONNECTION '{}' PUBLICATION pub WITH " + "(slot_name = '{}', create_slot = false)".format(connstr, lsub_slot) + ) + subscriber.wait_for_subscription_sync(publisher, "sub") + assert ( + _slot_status(publisher, lsub_slot, "inactive_since IS NULL") == "t" + ), "active logical slot inactive_since is NULL" + subscriber.stop() + publisher.restart() + assert ( + publisher.safe_psql( + "SELECT inactive_since > '{}'::timestamptz FROM pg_replication_slots " + "WHERE slot_name = '{}' AND inactive_since IS NOT NULL;".format( + inactive_since, lsub_slot + ) + ) + == "t" + ), "inactive logical slot inactive_since updated" + publisher.stop() + subscriber.stop() + + +def _wait_log(node, pattern, offset, secs=False): + attempts = pypg.test_timeout_default() if secs else 10 * pypg.test_timeout_default() + for _ in range(attempts + 1): + if node.log_matches(pattern, offset): + return True + time.sleep(1 if secs else 0.1) + return False + + +def _wait_single_pid(node, backend_type): + for _ in range(10 * pypg.test_timeout_default() + 1): + pid = node.safe_psql( + "SELECT pid FROM pg_stat_activity WHERE backend_type = '{}'".format( + backend_type + ) + ) + if pid.isdigit(): + return int(pid) + time.sleep(0.1) + raise RuntimeError("could not determine single {} pid".format(backend_type)) diff --git a/src/test/recovery/pyt/test_020_archive_status.py b/src/test/recovery/pyt/test_020_archive_status.py new file mode 100644 index 0000000000000..b95441f87f8ed --- /dev/null +++ b/src/test/recovery/pyt/test_020_archive_status.py @@ -0,0 +1,187 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/020_archive_status.pl. + +WAL archiving status (.ready/.done) and recovery behavior with archive_mode +on/always on standbys. +""" + +import platform +import re + +import pypg + +windows_os = platform.system() == "Windows" +_BAD_COMMAND = ( + 'copy "%p_does_not_exist" "%f_does_not_exist"' + if windows_os + else 'cp "%p_does_not_exist" "%f_does_not_exist"' +) + + +def _status_paths(segment): + base = "pg_wal/archive_status/{}".format(segment) + return base + ".ready", base + ".done" + + +def test_archive_status(create_pg): + """.ready/.done lifecycle on a primary and on archive_mode on/always standbys.""" + primary = create_pg( + "primary", has_archiving=True, allows_streaming=True, start=False + ) + primary.append_conf("autovacuum = off") + primary.start() + primary_data = primary.datadir + + # Make archiving fail (a working command given a wrong path). + primary.safe_psql( + "ALTER SYSTEM SET archive_command TO '{}';\n" + "SELECT pg_reload_conf();".format(_BAD_COMMAND) + ) + + seg1 = primary.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn())") + seg1_ready, seg1_done = _status_paths(seg1) + primary.safe_psql( + "CREATE TABLE mine AS SELECT generate_series(1,10) AS x;\n" + "SELECT pg_switch_wal();\nCHECKPOINT;" + ) + + assert primary.poll_query_until( + "SELECT failed_count > 0 FROM pg_stat_archiver" + ), "archiving to fail" + assert (primary_data / seg1_ready).is_file(), ".ready exists for {}".format(seg1) + assert not (primary_data / seg1_done).is_file(), ".done absent for {}".format(seg1) + assert primary.safe_psql( + "SELECT archived_count, last_failed_wal FROM pg_stat_archiver" + ) == "0|{}".format(seg1), "pg_stat_archiver failed to archive {}".format(seg1) + + # Crash, then a cold backup taken while archiving fails (used by standbys). + primary.stop("immediate") + primary.backup_fs_cold("backup") + primary.start() + assert (primary_data / seg1_ready).is_file(), ".ready survives crash recovery" + + # Allow archiving again; wait for success. + primary.safe_psql("ALTER SYSTEM RESET archive_command;\nSELECT pg_reload_conf();") + assert primary.poll_query_until( + "SELECT archived_count FROM pg_stat_archiver", expected="1" + ), "archiving to finish" + assert not (primary_data / seg1_ready).is_file(), ".ready removed for {}".format( + seg1 + ) + assert (primary_data / seg1_done).is_file(), ".done exists for {}".format(seg1) + assert ( + primary.safe_psql("SELECT last_archived_wal FROM pg_stat_archiver") == seg1 + ), "archive success reported for {}".format(seg1) + + seg2 = primary.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn())") + seg2_ready, seg2_done = _status_paths(seg2) + primary.safe_psql( + "INSERT INTO mine SELECT generate_series(10,20) AS x;\nCHECKPOINT;" + ) + primary_lsn = primary.safe_psql("SELECT pg_switch_wal();") + assert primary.poll_query_until( + "SELECT last_archived_wal FROM pg_stat_archiver", expected=seg2 + ), "archiving to finish" + + _test_standby_on(create_pg, primary, primary_lsn, seg1_ready, seg2_ready, seg2_done) + _test_standby_always( + create_pg, + primary, + primary_lsn, + seg1_ready, + seg2_ready, + seg1_done, + seg2_done, + seg2, + ) + _test_backup_mode(primary) + + +def _test_standby_on( + create_pg, primary, primary_lsn, seg1_ready, seg2_ready, seg2_done +): + standby1 = create_pg( + "standby", from_backup=(primary, "backup"), has_restoring=True, start=False + ) + standby1.append_conf("archive_mode = on") + standby1.start() + data = standby1.datadir + assert standby1.poll_query_until( + "SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '{}') >= 0".format( + primary_lsn + ) + ), "xlog replay on standby1" + standby1.safe_psql("CHECKPOINT") + + assert not ( + data / seg1_ready + ).is_file(), "inherited .ready removed (archive_mode=on)" + assert not (data / seg2_ready).is_file(), ".ready not created (archive_mode=on)" + assert (data / seg2_done).is_file(), ".done created (archive_mode=on)" + + +def _test_standby_always( + create_pg, primary, primary_lsn, seg1_ready, seg2_ready, seg1_done, seg2_done, seg2 +): + standby2 = create_pg( + "standby2", from_backup=(primary, "backup"), has_restoring=True, start=False + ) + standby2.append_conf("archive_mode = always") + standby2.start() + data = standby2.datadir + assert standby2.poll_query_until( + "SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '{}') >= 0".format( + primary_lsn + ) + ), "xlog replay on standby2" + standby2.safe_psql("CHECKPOINT") + + assert (data / seg1_ready).is_file(), "inherited .ready kept (archive_mode=always)" + assert (data / seg2_ready).is_file(), ".ready created (archive_mode=always)" + + standby2.safe_psql("SELECT pg_stat_reset_shared('archiver')") + + # Crash recovery must not remove non-archived WAL segments. + standby2.stop("immediate") + standby2.start() + assert (data / seg1_ready).is_file(), "WAL still ready to archive after crash" + + standby2.safe_psql("ALTER SYSTEM RESET archive_command;\nSELECT pg_reload_conf();") + assert standby2.poll_query_until( + "SELECT last_archived_wal FROM pg_stat_archiver", expected=seg2 + ), "archiving to finish" + assert ( + standby2.safe_psql("SELECT archived_count FROM pg_stat_archiver") == "2" + ), "correct number of WAL segments archived from standby" + assert ( + not (data / seg1_ready).is_file() and not (data / seg2_ready).is_file() + ), ".ready files removed after archive success (archive_mode=always)" + assert (data / seg1_done).is_file() and ( + data / seg2_done + ).is_file(), ".done files created after archive success (archive_mode=always)" + + # The archiver calls the shell archive module's shutdown callback. + standby2.append_conf("log_min_messages = debug1") + standby2.reload() + standby2.safe_psql("SELECT 1") + log_location = standby2.current_log_position() + standby2.stop() + assert re.search( + r"archiver process shutting down", pypg.slurp_file(standby2.log, log_location) + ), "check shutdown callback of shell archive module" + + +def _test_backup_mode(primary): + # Enter/leave backup mode without crashes; a too-long label fails cleanly. + result = primary.psql_capture( + "SELECT pg_backup_start('onebackup'); " + "SELECT pg_backup_stop();" + "SELECT pg_backup_start(repeat('x', 1026))" + ) + assert result.rc == 3, "psql fails correctly" + assert re.search( + r"backup label too long", result.stderr + ), "pg_backup_start fails gracefully" + primary.safe_psql("SELECT pg_backup_start('onebackup'); SELECT pg_backup_stop();") + primary.safe_psql("SELECT pg_backup_start('twobackup')") diff --git a/src/test/recovery/pyt/test_021_row_visibility.py b/src/test/recovery/pyt/test_021_row_visibility.py new file mode 100644 index 0000000000000..265ecb1ff351c --- /dev/null +++ b/src/test/recovery/pyt/test_021_row_visibility.py @@ -0,0 +1,112 @@ +# Copyright (c) 2017-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/021_row_visibility.pl. + +Row visibility on a hot standby tracks the primary through streaming: rows are +invisible until their inserting/updating transaction commits and the change is +replayed, uncommitted updates and prepared (two-phase) transactions are +invisible until COMMIT/COMMIT PREPARED, and an aborted prepared transaction +never becomes visible. Driven via interactive psql sessions on both nodes. +""" + +import pypg + + +def _send_wait(session, query, pattern): + """Send query to an interactive session and wait for pattern in its output.""" + session.query_until(pattern, query + "\n") + + +def test_021_row_visibility(create_pg): + """A hot standby reflects primary row visibility as transactions resolve.""" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf("max_prepared_transactions=10") + primary.start() + primary.safe_psql("CREATE TABLE public.test_visibility (data text not null)") + backup_name = "my_backup" + primary.backup(backup_name) + standby = create_pg( + "standby", from_backup=(primary, backup_name), has_streaming=True, start=False + ) + standby.append_conf("max_prepared_transactions=10") + standby.start() + timeout = 2 * pypg.test_timeout_default() + psql_primary = primary.background_psql( + "postgres", + on_error_stop=False, + tuples_only=False, + quiet=False, + timeout=timeout, + ) + psql_standby = standby.background_psql( + "postgres", + on_error_stop=False, + tuples_only=False, + quiet=False, + timeout=timeout, + ) + _send_wait( + psql_standby, + "SELECT * FROM test_visibility ORDER BY data;", + r"(?m)^\(0 rows\)$", + ) + primary.psql_capture("INSERT INTO test_visibility VALUES ('first insert')") + primary.wait_for_catchup(standby) + _send_wait( + psql_standby, + "SELECT * FROM test_visibility ORDER BY data;", + r"(?m)first insert.*\n\(1 row\)", + ) + _send_wait( + psql_primary, + "\nBEGIN;\nUPDATE test_visibility SET data = 'first update' " + "RETURNING data;\n", + r"(?m)^UPDATE 1$", + ) + primary.psql_capture("SELECT txid_current();") + primary.wait_for_catchup(standby) + _send_wait( + psql_standby, + "SELECT * FROM test_visibility ORDER BY data;", + r"(?m)first insert.*\n\(1 row\)", + ) + _send_wait(psql_primary, "COMMIT;", r"(?m)^COMMIT$") + primary.wait_for_catchup(standby) + _send_wait( + psql_standby, + "SELECT * FROM test_visibility ORDER BY data;", + r"(?m)first update\n\(1 row\)$", + ) + _send_wait( + psql_primary, + "\nDELETE from test_visibility;\n" + "BEGIN;\n" + "INSERT INTO test_visibility VALUES('inserted in prepared will_commit');\n" + "PREPARE TRANSACTION 'will_commit';", + r"(?m)^PREPARE TRANSACTION$", + ) + _send_wait( + psql_primary, + "\nBEGIN;\n" + "INSERT INTO test_visibility VALUES('inserted in prepared will_abort');\n" + "PREPARE TRANSACTION 'will_abort';\n", + r"(?m)^PREPARE TRANSACTION$", + ) + primary.wait_for_catchup(standby) + _send_wait( + psql_standby, + "SELECT * FROM test_visibility ORDER BY data;", + r"(?m)^\(0 rows\)$", + ) + primary.safe_psql("COMMIT PREPARED 'will_commit';") + primary.safe_psql("ROLLBACK PREPARED 'will_abort';") + primary.wait_for_catchup(standby) + _send_wait( + psql_standby, + "SELECT * FROM test_visibility ORDER BY data;", + r"(?m)will_commit.*\n\(1 row\)$", + ) + psql_primary.quit() + psql_standby.quit() + primary.stop() + standby.stop() diff --git a/src/test/recovery/pyt/test_022_crash_temp_files.py b/src/test/recovery/pyt/test_022_crash_temp_files.py new file mode 100644 index 0000000000000..280157936ccd9 --- /dev/null +++ b/src/test/recovery/pyt/test_022_crash_temp_files.py @@ -0,0 +1,92 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/022_crash_temp_files.pl. + +When a backend is SIGKILLed mid-INSERT (after spilling a temp file) and the +server crash-restarts, remove_temp_files_after_crash governs cleanup: with it +on, base/pgsql_tmp is empty after recovery; with it off, the orphaned temp file +survives the crash recovery and is only cleared by a later clean restart. A +second session blocked on a unique-index lock is used to guarantee the victim +backend has spilled its temp file before the kill. +""" + +import sys + +import pytest + +import pypg + +_CRASH_ERR = ( + r"WARNING: terminating connection because of crash of another server " + r"process|server closed the connection unexpectedly|connection to server " + r"was lost|could not send data to server" +) + + +def _spill_and_kill(node, killme, killme2, table): + """Set up the two sessions, spill a temp file, SIGKILL the victim.""" + pid = killme.query_until(r"[0-9]+[\r\n]", "SELECT pg_backend_pid();\n").strip() + killme2.query_until( + r"insert-tuple-to-lock-next-insert", + "BEGIN;\nINSERT INTO {} (a) VALUES(1);\n" + "SELECT $$insert-tuple-to-lock-next-insert$$;\n".format(table), + ) + killme.query_until( + r"in-progress-before-sigkill", + "BEGIN;\nSELECT $$in-progress-before-sigkill$$;\n" + "INSERT INTO {} (a) SELECT i FROM generate_series(1, 5000) s(i);\n".format( + table + ), + ) + killme2.query_until( + r"insert-tuple-lock-waiting", + "DO $c$\nDECLARE\n c INT;\nBEGIN\n LOOP\n" + " SELECT COUNT(*) INTO c FROM pg_locks WHERE pid = {} " + "AND NOT granted;\n" + " IF c > 0 THEN\n EXIT;\n END IF;\n END LOOP;\nEND; $c$;\n" + "SELECT $$insert-tuple-lock-waiting$$;\n".format(pid), + ) + node.pg_ctl("kill", "KILL", pid) + killme.quit() + killme2.wait_for_stderr(_CRASH_ERR, "SELECT pg_sleep({});\n".format(_timeout())) + killme2.quit() + assert node.poll_query_until("", ""), "server crash-recovered" + + +def _timeout(): + return pypg.test_timeout_default() + + +@pytest.mark.skipif(sys.platform == "win32", reason="tests hang on Windows") +def test_022_crash_temp_files(create_pg): + """remove_temp_files_after_crash controls temp-file cleanup across a crash.""" + node = create_pg("node_crash") + node.safe_psql( + "ALTER SYSTEM SET remove_temp_files_after_crash = on;\n" + "ALTER SYSTEM SET log_connections = receipt;\n" + "ALTER SYSTEM SET work_mem = '64kB';\n" + "ALTER SYSTEM SET restart_after_crash = on;\n" + "SELECT pg_reload_conf();" + ) + node.safe_psql("CREATE TABLE tab_crash (a integer UNIQUE);") + killme = node.background_psql("postgres", on_error_stop=True) + killme2 = node.background_psql("postgres", on_error_stop=True) + _spill_and_kill(node, killme, killme2, "tab_crash") + assert ( + node.safe_psql("SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)") == "0" + ), "no temporary files" + node.safe_psql( + "ALTER SYSTEM SET remove_temp_files_after_crash = off;\n" + "SELECT pg_reload_conf();" + ) + killme = node.background_psql("postgres", on_error_stop=True) + killme2 = node.background_psql("postgres", on_error_stop=True) + _spill_and_kill(node, killme, killme2, "tab_crash") + assert ( + node.safe_psql("SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)") == "1" + ), "one temporary file" + node.restart() + assert ( + node.safe_psql("SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)") == "0" + ), "temporary file was removed" + node.stop() diff --git a/src/test/recovery/pyt/test_023_pitr_prepared_xact.py b/src/test/recovery/pyt/test_023_pitr_prepared_xact.py new file mode 100644 index 0000000000000..0801d263711b9 --- /dev/null +++ b/src/test/recovery/pyt/test_023_pitr_prepared_xact.py @@ -0,0 +1,63 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/023_pitr_prepared_xact.pl. + +Point-in-time recovery (PITR) with prepared transactions. +""" + + +def test_pitr_prepared_xact(create_pg): + """A PITR target just after PREPARE leaves the 2PC xact to be committed.""" + primary = create_pg( + "primary", has_archiving=True, allows_streaming=True, start=False + ) + primary.append_conf("max_prepared_transactions = 10") + primary.start() + + backup_name = "my_backup" + primary.backup(backup_name) + + # Target a restore point just after PREPARE TRANSACTION, so the promoted + # node still needs an explicit COMMIT PREPARED. + node_pitr = create_pg( + "node_pitr", + from_backup=(primary, backup_name), + standby=False, + has_restoring=True, + start=False, + ) + node_pitr.append_conf( + "recovery_target_name = 'rp'\nrecovery_target_action = 'promote'" + ) + + primary.psql_capture( + "CREATE TABLE foo(i int);\n" + "BEGIN;\n" + "INSERT INTO foo VALUES(1);\n" + "PREPARE TRANSACTION 'fooinsert';\n" + "SELECT pg_create_restore_point('rp');\n" + "INSERT INTO foo VALUES(2);\n" + ) + + walfile = primary.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn());") + primary.safe_psql("SELECT pg_switch_wal()") + assert primary.poll_query_until( + "SELECT '{}' <= last_archived_wal FROM pg_stat_archiver;".format(walfile) + ), "WAL segment archived" + + node_pitr.start() + assert node_pitr.poll_query_until( + "SELECT pg_is_in_recovery() = 'f';" + ), "PITR node exited recovery" + + # Only the prepared-transaction row should be present; the INSERT after the + # restore point is past the recovery target. + node_pitr.psql_capture("COMMIT PREPARED 'fooinsert';") + assert ( + node_pitr.safe_psql("SELECT * FROM foo;") == "1" + ), "check table contents after COMMIT PREPARED" + + # New data on the post-promotion timeline must survive an immediate restart. + node_pitr.psql_capture("INSERT INTO foo VALUES(3);\nCHECKPOINT;\n") + node_pitr.stop("immediate") + node_pitr.start() diff --git a/src/test/recovery/pyt/test_024_archive_recovery.py b/src/test/recovery/pyt/test_024_archive_recovery.py new file mode 100644 index 0000000000000..7a0462e04e9fc --- /dev/null +++ b/src/test/recovery/pyt/test_024_archive_recovery.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/024_archive_recovery.pl. + +Archive recovery of WAL generated with wal_level=minimal must fail. +""" + +import re +import time + +from pypg import slurp_file + +_REPLICA_CONFIG = ( + "wal_level = replica\n" + "archive_mode = on\n" + "max_wal_senders = 10\n" + "hot_standby = off" +) + + +def _wait_postmaster_gone(node): + pidfile = node.datadir / "postmaster.pid" + for _ in range(10 * 180): + if not pidfile.exists(): + return + time.sleep(0.1) + + +def test_archive_recovery(create_pg, pg_bin): + """Recovery (and standby start) FATALs on a wal_level=minimal record.""" + node = create_pg("orig", has_archiving=True, allows_streaming=True, start=False) + node.append_conf(_REPLICA_CONFIG) + node.start() + + backup_name = "my_backup" + node.backup(backup_name) + + # Generate WAL with wal_level=minimal (archiving off, so not archived yet). + node.append_conf("wal_level = minimal\narchive_mode = off\nmax_wal_senders = 0") + node.restart() + + # Switch back to replica/archiving so the wal_level-change record gets + # archived. + node.append_conf(_REPLICA_CONFIG) + node.restart() + + walfile = node.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn());") + node.safe_psql("SELECT pg_switch_wal()") + assert node.poll_query_until( + "SELECT '{}' <= last_archived_wal FROM pg_stat_archiver;".format(walfile) + ), "WAL segment archived" + node.stop() + + def check_recovery(node_name, node_text, standby_setting): + recovery_node = create_pg( + node_name, + from_backup=(node, backup_name), + has_restoring=True, + standby=standby_setting, + start=False, + ) + # pg_ctl directly (not start) because recovery is expected to fail. + pg_bin.result( + [ + "pg_ctl", + "--pgdata", + str(recovery_node.datadir), + "--log", + str(recovery_node.log), + "start", + ] + ) + _wait_postmaster_gone(recovery_node) + assert re.search( + r'FATAL: .* WAL was generated with "wal_level=minimal", ' + r"cannot continue recovering", + slurp_file(recovery_node.log), + ), "{} ends with an error on wal_level=minimal WAL".format(node_text) + + check_recovery("archive_recovery", "archive recovery", False) + check_recovery("standby", "standby", True) diff --git a/src/test/recovery/pyt/test_025_stuck_on_old_timeline.py b/src/test/recovery/pyt/test_025_stuck_on_old_timeline.py new file mode 100644 index 0000000000000..932bbca481fc3 --- /dev/null +++ b/src/test/recovery/pyt/test_025_stuck_on_old_timeline.py @@ -0,0 +1,56 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/025_stuck_on_old_timeline.pl. + +A cascading standby that restores WAL from the primary's archive (and streams +from an intermediate standby) must follow a timeline switch when the +intermediate standby is promoted: after promotion and a WAL switch, new content +written on the promoted node replicates through to the cascade standby. +""" + + +def test_025_stuck_on_old_timeline(create_pg): + """A cascade standby follows the timeline switch via archive + streaming.""" + primary = create_pg( + "primary", allows_streaming=True, has_archiving=True, start=False + ) + primary.append_conf("\nwal_keep_size=128MB\n") + primary.start() + backup_name = "my_backup" + primary.backup(backup_name) + standby = create_pg( + "standby", + from_backup=(primary, backup_name), + allows_streaming=True, + has_streaming=True, + has_archiving=True, + start=False, + ) + standby.start() + standby.backup(backup_name, backup_options=["-Xnone"]) + cascade = create_pg( + "cascade", from_backup=(standby, backup_name), has_streaming=True, start=False + ) + cascade.enable_restoring(primary) + cascade.append_conf("\nrecovery_target_timeline='latest'\n") + standby.promote() + assert standby.poll_query_until( + "SELECT NOT pg_is_in_recovery();" + ), "Timed out while waiting for promotion" + walfile_to_be_archived = standby.safe_psql( + "SELECT pg_walfile_name(pg_current_wal_lsn());" + ) + standby.safe_psql("SELECT pg_switch_wal()") + assert standby.poll_query_until( + "SELECT '{}' <= last_archived_wal FROM pg_stat_archiver".format( + walfile_to_be_archived + ) + ), "Timed out while waiting for WAL segment to be archived" + cascade.start() + standby.safe_psql("CREATE TABLE tab_int AS SELECT 1 AS a") + standby.wait_for_catchup(cascade) + result = cascade.safe_psql("SELECT count(*) FROM tab_int") + assert result == "1", "check streamed content on cascade standby" + cascade.stop() + standby.stop() + primary.stop() diff --git a/src/test/recovery/pyt/test_026_overwrite_contrecord.py b/src/test/recovery/pyt/test_026_overwrite_contrecord.py new file mode 100644 index 0000000000000..17df9062b4f8f --- /dev/null +++ b/src/test/recovery/pyt/test_026_overwrite_contrecord.py @@ -0,0 +1,86 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/026_overwrite_contrecord.pl. + +Already-propagated WAL segments ending in incomplete WAL records: a standby +missing the last WAL file must replay the "overwrite contrecord" from a +divergent file written after the primary restarts, and promote successfully. +""" + +import re + +from pypg import slurp_file + +# Fill the current WAL segment, leaving room only for the start of a large +# record, then stop. +_FILL_WAL = """\ +DO $$ +DECLARE + wal_segsize int := setting::int FROM pg_settings WHERE name = 'wal_segment_size'; + remain int; + iters int := 0; +BEGIN + LOOP + INSERT into filler + select g, repeat(encode(sha256(g::text::bytea), 'hex'), (random() * 15 + 1)::int) + from generate_series(1, 10) g; + + remain := wal_segsize - (pg_current_wal_insert_lsn() - '0/0') % wal_segsize; + IF remain < 2 * setting::int from pg_settings where name = 'block_size' THEN + RAISE log 'exiting after % iterations, % bytes to end of WAL segment', iters, remain; + EXIT; + END IF; + iters := iters + 1; + END LOOP; +END +$$; +""" + + +def test_overwrite_contrecord(create_pg): + """A standby replays past an overwritten contrecord and promotes.""" + node = create_pg("primary", allows_streaming=True, start=False) + node.append_conf("autovacuum = off\nwal_keep_size = 1GB") + node.start() + + node.safe_psql("create table filler (a int, b text)") + node.safe_psql(_FILL_WAL) + + initfile = node.safe_psql("SELECT pg_walfile_name(pg_current_wal_insert_lsn())") + node.safe_psql( + "SELECT pg_logical_emit_message(true, 'test 026', repeat('xyzxz', 123456))" + ) + endfile = node.safe_psql("SELECT pg_walfile_name(pg_current_wal_insert_lsn())") + assert initfile != endfile, "{} differs from {}".format(initfile, endfile) + + # Stop abruptly (no shutdown checkpoint), then remove the tail file; on + # startup the large message is overwritten with new contents. + node.stop("immediate") + (node.datadir / "pg_wal" / endfile).unlink() + + node.backup_fs_cold("backup") + node_standby = create_pg( + "standby", from_backup=(node, "backup"), has_streaming=True, start=False + ) + node_standby.start() + node.start() + + node.safe_psql("create table foo (a text); insert into foo values ('hello')") + node.safe_psql("SELECT pg_logical_emit_message(true, 'test 026', 'AABBCC')") + + until_lsn = node.safe_psql("SELECT pg_current_wal_lsn()") + assert node_standby.poll_query_until( + "SELECT '{}'::pg_lsn <= pg_last_wal_replay_lsn()".format(until_lsn) + ), "standby caught up" + + assert ( + node_standby.safe_psql("select * from foo") == "hello" + ), "standby replays past overwritten contrecord" + + assert re.search( + r"successfully skipped missing contrecord at", slurp_file(node_standby.log) + ), "found log line in standby" + + node_standby.promote() + node.stop() + node_standby.stop() diff --git a/src/test/recovery/pyt/test_027_stream_regress.py b/src/test/recovery/pyt/test_027_stream_regress.py new file mode 100644 index 0000000000000..33eaf8e78334d --- /dev/null +++ b/src/test/recovery/pyt/test_027_stream_regress.py @@ -0,0 +1,175 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/recovery/t/027_stream_regress.pl. + +Runs the core regression suite against a streaming primary, replicates to a +standby, and verifies the two stay logically identical: full pg_dumpall outputs +match, pg_catalog dumps match after quiescing, and pg_stat_statements on the +primary recorded the expected statement categories. +""" + +import os + +import pypg + + +def test_027_stream_regress(create_pg, pg_bin): + """Regression suite replicates faithfully; primary/standby dumps match.""" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.adjust_conf("max_connections", "25") + primary.append_conf("max_prepared_transactions = 10") + primary.append_conf( + "shared_preload_libraries = 'pg_stat_statements'\n" + "pg_stat_statements.max = 50000\ncompute_query_id = 'regress'\n" + ) + primary.append_conf("synchronize_seqscans = off") + if "wal_consistency_checking" in os.environ.get("PG_TEST_EXTRA", "").split(): + primary.append_conf("wal_consistency_checking = all") + primary.start() + assert ( + primary.psql_capture( + "SELECT pg_create_physical_replication_slot('standby_1');" + ).rc + == 0 + ), "physical slot created on primary" + backup_name = "my_backup" + primary.backup(backup_name) + standby = create_pg( + "standby_1", from_backup=(primary, backup_name), has_streaming=True, start=False + ) + standby.append_conf("primary_slot_name = standby_1") + standby.append_conf("max_standby_streaming_delay = 600s") + standby.start() + dlpath = os.path.dirname(os.environ["REGRESS_SHLIB"]) + outputdir = str(_tmp_check()) + regress_in = os.path.join(os.path.dirname(__file__), "..", "..", "regress") + extra_opts = os.environ.get("EXTRA_REGRESS_OPTS", "") + cmd = ( + [os.environ["PG_REGRESS"]] + + extra_opts.split() + + [ + "--dlpath=" + dlpath, + "--bindir=", + "--host=" + str(primary.host), + "--port=" + str(primary.port), + "--schedule=" + os.path.join(regress_in, "parallel_schedule"), + "--max-concurrent-tests=20", + "--inputdir=" + regress_in, + "--outputdir=" + outputdir, + ] + ) + pg_bin.command_ok(cmd, "regression tests pass") + assert primary.is_alive(), "primary alive after regression test run" + assert standby.is_alive(), "standby alive after regression test run" + primary.psql_capture( + "select setval(seqrelid, nextval(seqrelid)) from pg_sequence", + dbname="regression", + ) + primary.wait_for_replay_catchup(standby) + pg_bin.command_ok( + [ + "pg_dumpall", + "--file", + outputdir + "/primary.dump", + "--no-sync", + "--no-statistics", + "--restrict-key", + "test", + "--host", + str(primary.host), + "--port", + str(primary.port), + "--no-unlogged-table-data", + ], + "dump primary server", + ) + pg_bin.command_ok( + [ + "pg_dumpall", + "--file", + outputdir + "/standby.dump", + "--no-sync", + "--no-statistics", + "--restrict-key", + "test", + "--host", + str(standby.host), + "--port", + str(standby.port), + ], + "dump standby server", + ) + pypg.compare_files( + outputdir + "/primary.dump", + outputdir + "/standby.dump", + "compare primary and standby dumps", + ) + primary.append_conf("autovacuum = off") + primary.restart() + primary.wait_for_replay_catchup(standby) + pg_bin.command_ok( + [ + "pg_dump", + "--schema", + "pg_catalog", + "--file", + outputdir + "/catalogs_primary.dump", + "--no-sync", + "--restrict-key", + "test", + "--host", + str(primary.host), + "--port", + str(primary.port), + "--no-unlogged-table-data", + "regression", + ], + "dump catalogs of primary server", + ) + pg_bin.command_ok( + [ + "pg_dump", + "--schema", + "pg_catalog", + "--file", + outputdir + "/catalogs_standby.dump", + "--no-sync", + "--restrict-key", + "test", + "--host", + str(standby.host), + "--port", + str(standby.port), + "regression", + ], + "dump catalogs of standby server", + ) + pypg.compare_files( + outputdir + "/catalogs_primary.dump", + outputdir + "/catalogs_standby.dump", + "compare primary and standby catalog dumps", + ) + primary.safe_psql("CREATE EXTENSION pg_stat_statements") + result = primary.safe_psql( + "WITH select_stats AS\n" + " (SELECT upper(substr(query, 1, 6)) AS select_query\n" + " FROM pg_stat_statements\n" + " WHERE upper(substr(query, 1, 6)) IN ('SELECT', 'UPDATE',\n" + " 'INSERT', 'DELETE',\n" + " 'CREATE'))\n" + " SELECT select_query, count(select_query) > 1 AS some_rows\n" + " FROM select_stats\n" + " GROUP BY select_query ORDER BY select_query;" + ) + assert ( + result == "CREATE|t\nDELETE|t\nINSERT|t\nSELECT|t\nUPDATE|t" + ), "check contents of pg_stat_statements on regression database" + standby.stop() + primary.stop() + + +def _tmp_check(): + import tempfile # pylint: disable=import-outside-toplevel + + return tempfile.mkdtemp(prefix="streamregress_") diff --git a/src/test/recovery/pyt/test_028_pitr_timelines.py b/src/test/recovery/pyt/test_028_pitr_timelines.py new file mode 100644 index 0000000000000..e4a2c1b51768a --- /dev/null +++ b/src/test/recovery/pyt/test_028_pitr_timelines.py @@ -0,0 +1,89 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/028_pitr_timelines.pl. + +PITR to a target physically located in a WAL segment with a higher TLI than +the target point's TLI: recovery finds the WAL but does not follow the timeline +switch, creating a TLI 1 -> 3 end-of-recovery record. +""" + + +def test_pitr_timelines(create_pg): + """PITR across a segment that also holds a later timeline's switch.""" + primary = create_pg("primary", has_archiving=True, allows_streaming=True) + primary.backup("my_backup") + + primary.psql_capture( + "CREATE TABLE foo(i int);\n" + "INSERT INTO foo VALUES(1);\n" + "SELECT pg_create_restore_point('rp');\n" + "INSERT INTO foo VALUES(2);" + ) + + standby = create_pg( + "standby", + from_backup=(primary, "my_backup"), + has_streaming=True, + has_archiving=True, + start=False, + ) + standby.append_conf("archive_mode = always") + standby.start() + primary.wait_for_catchup(standby) + assert ( + standby.safe_psql("SELECT max(i) FROM foo;") == "2" + ), "check table contents after archive recovery" + + # Kill the primary before it archives the segment with all the INSERTs. + primary.stop("immediate") + + # Promote and switch WAL so the segment is archived on a new timeline. + standby.promote() + standby.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn());") + standby.safe_psql("SELECT pg_switch_wal()") + standby.stop() + + # PITR to the restore point: finds the WAL in the TLI-2 segment but does + # not follow the timeline switch. + node_pitr = create_pg( + "node_pitr", + from_backup=(primary, "my_backup"), + standby=False, + has_restoring=True, + start=False, + ) + node_pitr.append_conf( + "recovery_target_name = 'rp'\nrecovery_target_action = 'promote'" + ) + node_pitr.start() + assert node_pitr.poll_query_until( + "SELECT pg_is_in_recovery() = 'f';" + ), "PITR promotion finished" + assert ( + node_pitr.safe_psql("SELECT max(i) FROM foo;") == "1" + ), "check table contents after point-in-time recovery" + + node_pitr.safe_psql("INSERT INTO foo VALUES(3);") + # Ensure the archiver is running before stopping, so the archive completes. + assert node_pitr.poll_query_until( + "SELECT true FROM pg_stat_activity WHERE backend_type = 'archiver';" + ), "archiver started" + node_pitr.stop() + + # Archive recovery on the PITR-created timeline replays the TLI 1 -> 3 + # end-of-recovery record. + node_pitr2 = create_pg( + "node_pitr2", + from_backup=(primary, "my_backup"), + standby=False, + has_restoring=True, + start=False, + ) + node_pitr2.append_conf("recovery_target_action = 'promote'") + node_pitr2.start() + assert node_pitr2.poll_query_until( + "SELECT pg_is_in_recovery() = 'f';" + ), "PITR promotion finished" + assert ( + node_pitr2.safe_psql("SELECT max(i) FROM foo;") == "3" + ), "check table contents after point-in-time recovery" diff --git a/src/test/recovery/pyt/test_029_stats_restart.py b/src/test/recovery/pyt/test_029_stats_restart.py new file mode 100644 index 0000000000000..bfaa60ace209d --- /dev/null +++ b/src/test/recovery/pyt/test_029_stats_restart.py @@ -0,0 +1,201 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/029_stats_restart.pl. + +Cumulative statistics survive a clean restart (the stats file is written at +shutdown and reloaded) but are discarded after an immediate (crash) shutdown or +when the stats file is corrupted/truncated. Also checks that pg_stat_reset_shared +for checkpointer and wal resets the right counters and bumps stats_reset, and +that those resets persist across restarts. +""" + +import os +import shutil +import tempfile + +CONNECT_DB = "postgres" +TEST_DB = "test" + + +def _have_stats(node, kind, dboid, objid): + return node.safe_psql( + "SELECT pg_stat_have_stats('{}', {}, {})".format(kind, dboid, objid) + ) + + +def _trigger_funcrel_stat(node): + node.safe_psql( + "SELECT * FROM tab_stats_crash_discard_test1;\n" + "SELECT func_stats_crash_discard1();\nSELECT pg_stat_force_next_flush();", + dbname=TEST_DB, + ) + + +def _checkpoint_stats(node): + return { + "count": int( + node.safe_psql("SELECT num_timed + num_requested FROM pg_stat_checkpointer") + ), + "reset": node.safe_psql("SELECT stats_reset FROM pg_stat_checkpointer"), + } + + +def _wal_stats(node): + return { + "records": int(node.safe_psql("SELECT wal_records FROM pg_stat_wal")), + "bytes": int(node.safe_psql("SELECT wal_bytes FROM pg_stat_wal")), + "reset": node.safe_psql("SELECT stats_reset FROM pg_stat_wal"), + } + + +def _io_stats(node, context, obj, backend_type): + where = "context = '{}' AND object = '{}' AND backend_type = '{}'".format( + context, obj, backend_type + ) + writes = node.safe_psql("SELECT writes FROM pg_stat_io WHERE " + where) + reads = node.safe_psql("SELECT reads FROM pg_stat_io WHERE " + where) + return { + "writes": int(writes) if writes else 0, + "reads": int(reads) if reads else 0, + } + + +def test_029_stats_restart(create_pg): + """Stats persist across clean restart, vanish on crash/corruption, reset OK.""" + node = create_pg("primary", allows_streaming=True, start=False) + node.append_conf("track_functions = 'all'") + node.start() + standalone = _io_stats(node, "init", "wal", "standalone backend") + startup = _io_stats(node, "normal", "wal", "startup") + assert standalone["writes"] > 0, "increased standalone backend IO writes" + assert startup["reads"] > 0, "increased startup IO reads" + node.safe_psql("CREATE DATABASE {}".format(TEST_DB)) + node.safe_psql( + "CREATE TABLE tab_stats_crash_discard_test1 AS " + "SELECT generate_series(1,100) AS a", + dbname=TEST_DB, + ) + node.safe_psql( + "CREATE FUNCTION func_stats_crash_discard1() RETURNS VOID AS " + "'select 2;' LANGUAGE SQL IMMUTABLE", + dbname=TEST_DB, + ) + dboid = node.safe_psql( + "SELECT oid FROM pg_database WHERE datname = '{}'".format(TEST_DB), + dbname=TEST_DB, + ) + funcoid = node.safe_psql( + "SELECT 'func_stats_crash_discard1()'::regprocedure::oid", dbname=TEST_DB + ) + tableoid = node.safe_psql( + "SELECT 'tab_stats_crash_discard_test1'::regclass::oid", dbname=TEST_DB + ) + _trigger_funcrel_stat(node) + for kind, objid in ( + ("database", "0"), + ("function", funcoid), + ("relation", tableoid), + ): + assert ( + _have_stats(node, kind, dboid, objid) == "t" + ), "initial: {} stats exist".format(kind) + _stats_file_cycles(node, dboid, funcoid, tableoid) + _reset_cycles(node) + + +def _expect_stats(node, dboid, funcoid, tableoid, present, sect): + for kind, objid in ( + ("database", "0"), + ("function", funcoid), + ("relation", tableoid), + ): + assert ( + _have_stats(node, kind, dboid, objid) == present + ), "{}: {} stats {}".format(sect, kind, "exist" if present == "t" else "absent") + + +def _stats_file_cycles(node, dboid, funcoid, tableoid): + node.stop() + statsfile = tempfile.mktemp(prefix="discard_stats1_") + og_stats = node.datadir / "pg_stat" / "pgstat.stat" + assert og_stats.is_file(), "origin stats file must exist" + shutil.copy(og_stats, statsfile) + node.start() + _expect_stats(node, dboid, funcoid, tableoid, "t", "copy") + node.stop("immediate") + assert not og_stats.exists(), "no stats file after immediate shutdown" + shutil.copy(statsfile, og_stats) + node.start() + _expect_stats(node, dboid, funcoid, tableoid, "f", "post immediate") + os.unlink(statsfile) + _trigger_funcrel_stat(node) + _expect_stats(node, dboid, funcoid, tableoid, "t", "post immediate, new") + node.stop() + with open(og_stats, "w", encoding="utf-8") as fh: + fh.write("ZZZZZZZZZZZZZ") + node.start() + _expect_stats(node, dboid, funcoid, tableoid, "f", "invalid_overwrite") + _trigger_funcrel_stat(node) + node.stop() + with open(og_stats, "a", encoding="utf-8") as fh: + fh.write("XYZ") + node.start() + _expect_stats(node, dboid, funcoid, tableoid, "f", "invalid_append") + + +def _reset_cycles(node): + node.safe_psql("CHECKPOINT; CHECKPOINT;") + ckpt0 = _checkpoint_stats(node) + wal0 = _wal_stats(node) + node.restart() + ckpt1 = _checkpoint_stats(node) + wal1 = _wal_stats(node) + assert ckpt0["count"] < ckpt1["count"], "post restart: increased checkpoint count" + assert wal0["records"] < wal1["records"], "post restart: increased wal records" + assert wal0["bytes"] < wal1["bytes"], "post restart: increased wal bytes" + assert ( + ckpt0["reset"] == ckpt1["reset"] + ), "post restart: checkpoint stats_reset equal" + assert wal0["reset"] == wal1["reset"], "post restart: wal stats_reset equal" + node.safe_psql("SELECT pg_stat_reset_shared('checkpointer')") + ckpt2 = _checkpoint_stats(node) + wal2 = _wal_stats(node) + assert ckpt1["count"] > ckpt2["count"], "post ckpt reset: checkpoint count smaller" + assert ckpt0["reset"] < ckpt2["reset"], "post ckpt reset: stats_reset newer" + assert wal1["records"] <= wal2["records"], "post ckpt reset: wal records unaffected" + assert wal0["reset"] == wal2["reset"], "post ckpt reset: wal stats_reset equal" + node.restart() + ckpt3 = _checkpoint_stats(node) + wal3 = _wal_stats(node) + assert ckpt3["count"] < ckpt1["count"], "post ckpt reset & restart: still reset" + assert ( + ckpt3["reset"] == ckpt2["reset"] + ), "post ckpt reset & restart: stats_reset same" + assert ( + wal2["records"] < wal3["records"] + ), "post ckpt reset & restart: increased wal records" + assert ( + wal2["bytes"] < wal3["bytes"] + ), "post ckpt reset & restart: increased wal bytes" + assert ( + wal0["reset"] == wal3["reset"] + ), "post ckpt reset & restart: wal stats_reset equal" + node.safe_psql("SELECT pg_stat_reset_shared('wal')") + wal4 = _wal_stats(node) + assert wal4["records"] < wal3["records"], "post wal reset: smaller record count" + assert wal4["bytes"] < wal3["bytes"], "post wal reset: smaller bytes" + assert wal4["reset"] > wal3["reset"], "post wal reset: newer stats_reset" + node.restart() + wal5 = _wal_stats(node) + assert ( + wal5["records"] < wal3["records"] + ), "post wal reset & restart: smaller record count" + assert wal4["bytes"] < wal3["bytes"], "post wal reset & restart: smaller bytes" + assert wal4["reset"] > wal3["reset"], "post wal reset & restart: newer stats_reset" + node.stop("immediate") + node.start() + wal6 = _wal_stats(node) + assert ( + wal5["reset"] < wal6["reset"] + ), "post immediate restart: reset timestamp is new" + node.stop() diff --git a/src/test/recovery/pyt/test_030_stats_cleanup_replica.py b/src/test/recovery/pyt/test_030_stats_cleanup_replica.py new file mode 100644 index 0000000000000..9665405bd6012 --- /dev/null +++ b/src/test/recovery/pyt/test_030_stats_cleanup_replica.py @@ -0,0 +1,131 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/030_stats_cleanup_replica.pl. + +Standbys drop stats when the drop records are replayed, persist stats across +graceful restarts, and discard stats after an immediate/crash restart. +""" + + +def _populate_standby_stats(primary, standby, connect_db, schema): + primary.safe_psql( + "CREATE TABLE {}.drop_tab_test1 AS SELECT generate_series(1,100) AS a".format( + schema + ), + dbname=connect_db, + ) + primary.safe_psql( + "CREATE FUNCTION {}.drop_func_test1() RETURNS VOID AS 'select 2;' " + "LANGUAGE SQL IMMUTABLE".format(schema), + dbname=connect_db, + ) + primary.wait_for_catchup(standby) + + dboid = standby.safe_psql( + "SELECT oid FROM pg_database WHERE datname = '{}'".format(connect_db), + dbname=connect_db, + ) + tableoid = standby.safe_psql( + "SELECT '{}.drop_tab_test1'::regclass::oid".format(schema), dbname=connect_db + ) + funcoid = standby.safe_psql( + "SELECT '{}.drop_func_test1()'::regprocedure::oid".format(schema), + dbname=connect_db, + ) + + # Generate stats on the standby. + standby.safe_psql( + "SELECT * FROM {}.drop_tab_test1".format(schema), dbname=connect_db + ) + standby.safe_psql("SELECT {}.drop_func_test1()".format(schema), dbname=connect_db) + return dboid, tableoid, funcoid + + +def _drop_function_by_oid(primary, connect_db, funcoid): + name = primary.safe_psql( + "SELECT '{}'::regprocedure".format(funcoid), dbname=connect_db + ) + primary.safe_psql("DROP FUNCTION {}".format(name), dbname=connect_db) + + +def _drop_table_by_oid(primary, connect_db, tableoid): + name = primary.safe_psql( + "SELECT '{}'::regclass".format(tableoid), dbname=connect_db + ) + primary.safe_psql("DROP TABLE {}".format(name), dbname=connect_db) + + +def _func_tab_status(standby, connect_db, oids, present, sect): + dboid, tableoid, funcoid = oids + rel = standby.safe_psql( + "SELECT pg_stat_have_stats('relation', {}, {})".format(dboid, tableoid), + dbname=connect_db, + ) + func = standby.safe_psql( + "SELECT pg_stat_have_stats('function', {}, {})".format(dboid, funcoid), + dbname=connect_db, + ) + assert rel == present and func == present, "{}: standby stats as expected".format( + sect + ) + + +def _db_status(standby, connect_db, dboid, present, sect): + assert ( + standby.safe_psql( + "SELECT pg_stat_have_stats('database', {}, 0)".format(dboid), + dbname=connect_db, + ) + == present + ), "{}: standby db stats as expected".format(sect) + + +def test_stats_cleanup_replica(create_pg): + """Standby stats track drops, survive graceful restart, vanish on crash.""" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf("track_functions = 'all'") + primary.start() + primary.backup("my_backup") + standby = create_pg( + "standby", from_backup=(primary, "my_backup"), has_streaming=True, start=False + ) + standby.start() + + # Drop directly. + oids = _populate_standby_stats(primary, standby, "postgres", "public") + _func_tab_status(standby, "postgres", oids, "t", "initial") + _drop_table_by_oid(primary, "postgres", oids[1]) + _drop_function_by_oid(primary, "postgres", oids[2]) + primary.wait_for_catchup(standby) + _func_tab_status(standby, "postgres", oids, "f", "post drop") + + # Drop indirectly via schema. + primary.safe_psql("CREATE SCHEMA drop_schema_test1") + primary.wait_for_catchup(standby) + oids = _populate_standby_stats(primary, standby, "postgres", "drop_schema_test1") + _func_tab_status(standby, "postgres", oids, "t", "schema creation") + primary.safe_psql("DROP SCHEMA drop_schema_test1 CASCADE") + primary.wait_for_catchup(standby) + _func_tab_status(standby, "postgres", oids, "f", "post schema drop") + + # Drop the database. + primary.safe_psql("CREATE DATABASE test") + primary.wait_for_catchup(standby) + oids = _populate_standby_stats(primary, standby, "test", "public") + _func_tab_status(standby, "test", oids, "t", "createdb") + _db_status(standby, "test", oids[0], "t", "createdb") + primary.safe_psql("DROP DATABASE test") + primary.wait_for_catchup(standby) + _func_tab_status(standby, "postgres", oids, "f", "post dropdb") + _db_status(standby, "postgres", oids[0], "f", "post dropdb") + + # Stats persist across a graceful restart. + oids = _populate_standby_stats(primary, standby, "postgres", "public") + _func_tab_status(standby, "postgres", oids, "t", "pre restart") + standby.restart() + _func_tab_status(standby, "postgres", oids, "t", "post non-immediate") + + # But are discarded after an immediate restart. + standby.stop("immediate") + standby.start() + _func_tab_status(standby, "postgres", oids, "f", "post immediate restart") diff --git a/src/test/recovery/pyt/test_031_recovery_conflict.py b/src/test/recovery/pyt/test_031_recovery_conflict.py new file mode 100644 index 0000000000000..1ea3d67ac7e76 --- /dev/null +++ b/src/test/recovery/pyt/test_031_recovery_conflict.py @@ -0,0 +1,228 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/031_recovery_conflict.pl. + +Exercises each recovery-conflict type on a hot-standby: buffer-pin, snapshot, +lock, tablespace, startup-deadlock, and database conflicts. For each, a standby +session is made to conflict with replayed primary activity; the standby logs the +expected "cancelled due to recovery conflict" reason and bumps the matching +pg_stat_database_conflicts counter. The total is checked against +pg_stat_database.conflicts. +""" + +import re + +import pypg + + +class _Conflicts: + """Mutable state shared with the nested check helpers.""" + + def __init__(self, standby, test_db): + self.standby = standby + self.test_db = test_db + self.log_location = standby.current_log_position() + self.sect = "" + + def check_log(self, message): + old = self.log_location + self.log_location = self.standby.wait_for_log(re.escape(message), old) + assert self.log_location > old, ( + "{}: logfile contains terminated connection due to recovery " + "conflict".format(self.sect) + ) + + def check_stat(self, conflict_type): + count = self.standby.safe_psql( + "SELECT confl_{} FROM pg_stat_database_conflicts WHERE " + "datname='{}';".format(conflict_type, self.test_db), + dbname=self.test_db, + ) + assert count == "1", "{}: stats show conflict on standby".format(self.sect) + + +def test_031_recovery_conflict(create_pg): + """Each recovery-conflict type is logged and counted on the standby.""" + tablespace1 = "test_recovery_conflict_tblspc" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf( + "\nallow_in_place_tablespaces = on\nlog_temp_files = 0\n" + "max_prepared_transactions = 10\nmax_standby_streaming_delay = 50ms\n" + "temp_tablespaces = {}\nlog_recovery_conflict_waits = on\n" + "deadlock_timeout = 10ms\n".format(tablespace1) + ) + primary.start() + backup_name = "my_backup" + primary.safe_psql("CREATE TABLESPACE {} LOCATION ''".format(tablespace1)) + primary.backup(backup_name) + standby = create_pg( + "standby", + from_backup=(primary, backup_name), + has_streaming=True, + start=False, + ) + standby.start() + test_db = "test_db" + primary.safe_psql("CREATE DATABASE {}".format(test_db)) + table1 = "test_recovery_conflict_table1" + table2 = "test_recovery_conflict_table2" + primary.safe_psql( + "CREATE TABLE {t1}(a int, b int);\n" + "INSERT INTO {t1} SELECT i % 3, 0 FROM generate_series(1,20) i;\n" + "CREATE TABLE {t2}(a int, b int);".format(t1=table1, t2=table2), + dbname=test_db, + ) + primary.wait_for_replay_catchup(standby) + psql = standby.background_psql(test_db, on_error_stop=False) + state = _Conflicts(standby, test_db) + cursor1 = "test_recovery_conflict_cursor" + expected = 0 + expected += _buffer_pin(primary, standby, psql, state, table1, cursor1, test_db) + expected += _snapshot(primary, standby, psql, state, table1, cursor1, test_db) + expected += _lock(primary, standby, psql, state, table1, cursor1, test_db) + expected += _tablespace( + primary, standby, psql, state, tablespace1, cursor1, test_db + ) + expected += _startup_deadlock( + primary, standby, psql, state, table1, table2, cursor1, test_db + ) + assert standby.safe_psql( + "SELECT conflicts FROM pg_stat_database WHERE datname='{}';".format(test_db), + dbname=test_db, + ) == str(expected), "{} recovery conflicts shown in pg_stat_database".format( + expected + ) + state.sect = "database conflict" + primary.safe_psql("DROP DATABASE {};".format(test_db)) + primary.wait_for_replay_catchup(standby) + state.check_log("User was connected to a database that must be dropped") + psql.quit() + standby.stop() + primary.stop() + + +def _buffer_pin(primary, standby, psql, state, table1, cursor1, test_db): + state.sect = "buffer pin conflict" + primary.safe_psql( + "BEGIN;\nINSERT INTO {t} VALUES (1,0);\nROLLBACK;\n" + "BEGIN; LOCK {t}; COMMIT;".format(t=table1), + dbname=test_db, + ) + primary.wait_for_replay_catchup(standby) + res = psql.query_safe( + "BEGIN;\nDECLARE {c} CURSOR FOR SELECT b FROM {t};\n" + "FETCH FORWARD FROM {c};".format(c=cursor1, t=table1) + ) + assert re.search( + r"^0$", res, re.M + ), "{}: cursor with conflicting pin established".format(state.sect) + state.log_location = standby.current_log_position() + primary.safe_psql("VACUUM FREEZE {};".format(table1), dbname=test_db) + primary.wait_for_replay_catchup(standby) + state.check_log("User was holding shared buffer pin for too long") + psql.reconnect_and_clear() + state.check_stat("bufferpin") + return 1 + + +def _snapshot(primary, standby, psql, state, table1, cursor1, test_db): + state.sect = "snapshot conflict" + primary.safe_psql( + "INSERT INTO {} SELECT i, 0 FROM generate_series(1,20) i".format(table1), + dbname=test_db, + ) + primary.wait_for_replay_catchup(standby) + res = psql.query_safe( + "BEGIN;\nDECLARE {c} CURSOR FOR SELECT b FROM {t};\n" + "FETCH FORWARD FROM {c};".format(c=cursor1, t=table1) + ) + assert re.search( + r"^0$", res, re.M + ), "{}: cursor with conflicting snapshot established".format(state.sect) + primary.safe_psql( + "UPDATE {} SET a = a + 1 WHERE a > 2;".format(table1), dbname=test_db + ) + primary.safe_psql("VACUUM FREEZE {};".format(table1), dbname=test_db) + primary.wait_for_replay_catchup(standby) + state.check_log( + "User query might have needed to see row versions that must be removed" + ) + psql.reconnect_and_clear() + state.check_stat("snapshot") + return 1 + + +def _lock(primary, standby, psql, state, table1, _cursor1, test_db): + state.sect = "lock conflict" + res = psql.query_safe( + "BEGIN;\nLOCK TABLE {} IN ACCESS SHARE MODE;\nSELECT 1;".format(table1) + ) + assert re.search(r"^1$", res, re.M), "{}: conflicting lock acquired".format( + state.sect + ) + primary.safe_psql("DROP TABLE {};".format(table1), dbname=test_db) + primary.wait_for_replay_catchup(standby) + state.check_log("User was holding a relation lock for too long") + psql.reconnect_and_clear() + state.check_stat("lock") + return 1 + + +def _tablespace(primary, standby, psql, state, tablespace1, cursor1, test_db): + state.sect = "tablespace conflict" + res = psql.query_safe( + "BEGIN;\nSET work_mem = '64kB';\nDECLARE {c} CURSOR FOR\n" + " SELECT count(*) FROM generate_series(1,6000);\n" + "FETCH FORWARD FROM {c};".format(c=cursor1) + ) + assert re.search( + r"^6000$", res, re.M + ), "{}: cursor with conflicting temp file established".format(state.sect) + primary.safe_psql("DROP TABLESPACE {};".format(tablespace1), dbname=test_db) + primary.wait_for_replay_catchup(standby) + state.check_log("User was or might have been using tablespace that must be dropped") + psql.reconnect_and_clear() + state.check_stat("tablespace") + return 1 + + +def _startup_deadlock(primary, standby, psql, state, table1, table2, cursor1, test_db): + state.sect = "startup deadlock" + standby.adjust_conf( + "max_standby_streaming_delay", + "{}s".format(pypg.test_timeout_default()), + ) + standby.restart() + psql.reconnect_and_clear() + primary.safe_psql( + "CREATE TABLE {t1}(a int, b int);\nINSERT INTO {t1} VALUES (1);\n" + "BEGIN;\nINSERT INTO {t1}(a) SELECT generate_series(1, 100) i;\n" + "ROLLBACK;\nBEGIN;\nLOCK TABLE {t2};\nPREPARE TRANSACTION 'lock';\n" + "INSERT INTO {t1}(a) VALUES (170);\nSELECT txid_current();".format( + t1=table1, t2=table2 + ), + dbname=test_db, + ) + primary.wait_for_replay_catchup(standby) + psql.query_until( + re.compile(r"^1$", re.M), + "BEGIN;\nDECLARE {c} CURSOR FOR SELECT a FROM {t1};\n" + "FETCH FORWARD FROM {c};\nSELECT * FROM {t2};\n".format( + c=cursor1, t1=table1, t2=table2 + ), + ) + assert standby.poll_query_until( + "SELECT 'waiting' FROM pg_locks WHERE locktype = 'relation' AND NOT " + "granted;", + "waiting", + ), "{}: lock acquisition is waiting".format(state.sect) + primary.safe_psql("VACUUM FREEZE {};".format(table1), dbname=test_db) + primary.wait_for_replay_catchup(standby) + state.check_log("User transaction caused buffer deadlock with recovery.") + psql.reconnect_and_clear() + state.check_stat("deadlock") + primary.safe_psql("ROLLBACK PREPARED 'lock';", dbname=test_db) + standby.adjust_conf("max_standby_streaming_delay", "50ms") + standby.restart() + psql.reconnect_and_clear() + return 1 diff --git a/src/test/recovery/pyt/test_032_relfilenode_reuse.py b/src/test/recovery/pyt/test_032_relfilenode_reuse.py new file mode 100644 index 0000000000000..99ec5af0829f5 --- /dev/null +++ b/src/test/recovery/pyt/test_032_relfilenode_reuse.py @@ -0,0 +1,122 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/032_relfilenode_reuse.pl. + +When a database OID (and thus relfilenode paths) is reused after DROP/CREATE +DATABASE, a hot standby must not lose or misapply buffered changes: forcing +buffer eviction (pg_prewarm) on both nodes around each update, across template +reuse, VACUUM FULL, and tablespace moves, the standby's contents always match +the primary, and both shut down cleanly. +""" + + +def _send_wait(session, query, pattern): + session.query_until(pattern, query + "\n") + + +def _cause_eviction(psql_primary, psql_standby): + query = ( + "SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class " + "WHERE pg_relation_filenode(oid) != 0;" + ) + _send_wait(psql_primary, query, r"warmed_buffers") + _send_wait(psql_standby, query, r"warmed_buffers") + + +def _verify(primary, standby, counter, message): + query = "SELECT datab, count(*) FROM large GROUP BY 1 ORDER BY 1 LIMIT 10" + assert primary.safe_psql(query, dbname="conflict_db") == "{}|4000".format( + counter + ), "primary: {}".format(message) + primary.wait_for_catchup(standby) + assert standby.safe_psql(query, dbname="conflict_db") == "{}|4000".format( + counter + ), "standby: {}".format(message) + + +def test_032_relfilenode_reuse(create_pg, pg_bin): + """Reused database OID / relfilenode replays correctly on a hot standby.""" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf( + "\nallow_in_place_tablespaces = true\nlog_connections=receipt\n" + "full_page_writes=off\nlog_min_messages=debug2\nshared_buffers=1MB\n" + ) + primary.start() + backup_name = "my_backup" + primary.backup(backup_name) + standby = create_pg( + "standby", from_backup=(primary, backup_name), has_streaming=True, start=False + ) + standby.start() + psql_primary = primary.background_psql( + "postgres", on_error_stop=False, tuples_only=False, quiet=False + ) + psql_standby = standby.background_psql( + "postgres", on_error_stop=False, tuples_only=False, quiet=False + ) + primary.safe_psql("CREATE DATABASE conflict_db_template OID = 50000;") + primary.safe_psql( + "CREATE TABLE large(id serial primary key, dataa text, datab text);\n" + "INSERT INTO large(dataa, datab) SELECT g.i::text, 1 " + "FROM generate_series(1, 4000) g(i);", + dbname="conflict_db_template", + ) + primary.safe_psql( + "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;" + ) + primary.safe_psql( + "CREATE EXTENSION pg_prewarm;\n" + "CREATE TABLE replace_sb(data text);\n" + "INSERT INTO replace_sb(data) SELECT random()::text " + "FROM generate_series(1, 15000);" + ) + primary.wait_for_catchup(standby) + _send_wait(psql_primary, "BEGIN;", r"BEGIN") + _send_wait(psql_standby, "BEGIN;", r"BEGIN") + primary.safe_psql("UPDATE large SET datab = 1;", dbname="conflict_db") + _cause_eviction(psql_primary, psql_standby) + primary.safe_psql("DROP DATABASE conflict_db;") + primary.safe_psql( + "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;" + ) + _verify(primary, standby, 1, "initial contents as expected") + primary.safe_psql("UPDATE large SET datab = 2;", dbname="conflict_db") + _cause_eviction(psql_primary, psql_standby) + _verify( + primary, + standby, + 2, + "update to reused relfilenode (due to DB oid conflict) is not lost", + ) + primary.safe_psql("VACUUM FULL large;", dbname="conflict_db") + primary.safe_psql("UPDATE large SET datab = 3;", dbname="conflict_db") + _verify(primary, standby, 3, "restored contents as expected") + primary.safe_psql("CREATE TABLESPACE test_tablespace LOCATION ''") + primary.safe_psql("UPDATE large SET datab = 4;", dbname="conflict_db") + _cause_eviction(psql_primary, psql_standby) + primary.safe_psql("ALTER DATABASE conflict_db SET TABLESPACE test_tablespace") + primary.safe_psql("ALTER DATABASE conflict_db SET TABLESPACE pg_default") + primary.safe_psql("UPDATE large SET datab = 5;", dbname="conflict_db") + _cause_eviction(psql_primary, psql_standby) + _verify(primary, standby, 5, "post move contents as expected") + primary.safe_psql("ALTER DATABASE conflict_db SET TABLESPACE test_tablespace") + primary.safe_psql("UPDATE large SET datab = 7;", dbname="conflict_db") + _cause_eviction(psql_primary, psql_standby) + primary.safe_psql("UPDATE large SET datab = 8;", dbname="conflict_db") + primary.safe_psql("DROP DATABASE conflict_db") + primary.safe_psql("DROP TABLESPACE test_tablespace") + primary.safe_psql("REINDEX TABLE pg_database") + psql_primary.quit() + psql_standby.quit() + primary.stop() + standby.stop() + pg_bin.command_like( + ["pg_controldata", primary.datadir], + r"Database cluster state:\s+shut down\n", + "primary shut down ok", + ) + pg_bin.command_like( + ["pg_controldata", standby.datadir], + r"Database cluster state:\s+shut down in recovery\n", + "standby shut down ok", + ) diff --git a/src/test/recovery/pyt/test_033_replay_tsp_drops.py b/src/test/recovery/pyt/test_033_replay_tsp_drops.py new file mode 100644 index 0000000000000..76c9b3f8a9109 --- /dev/null +++ b/src/test/recovery/pyt/test_033_replay_tsp_drops.py @@ -0,0 +1,102 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/033_replay_tsp_drops.pl. + +Replay of tablespace/database creation and drop, including replaying CREATE +DATABASE WAL records against already-removed directories. +""" + +import shutil +import time + +_WORKLOAD = """\ +CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts1 STRATEGY={strategy}; +CREATE TABLE t (a int) TABLESPACE dropme_ts2; +CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts2 STRATEGY={strategy}; +CREATE DATABASE moveme_db TABLESPACE source_ts STRATEGY={strategy}; +ALTER DATABASE moveme_db SET TABLESPACE target_ts; +CREATE DATABASE newdb TEMPLATE template_db STRATEGY={strategy}; +ALTER DATABASE template_db IS_TEMPLATE = false; +DROP DATABASE dropme_db1; +DROP TABLE t; +DROP DATABASE dropme_db2; DROP TABLESPACE dropme_ts2; +DROP TABLESPACE source_ts; +DROP DATABASE template_db; +""" + + +def _test_tablespace(create_pg, strategy): + primary = create_pg("primary1_" + strategy, allows_streaming=True) + primary.safe_psql( + "SET allow_in_place_tablespaces=on;\n" + "CREATE TABLESPACE dropme_ts1 LOCATION '';\n" + "CREATE TABLESPACE dropme_ts2 LOCATION '';\n" + "CREATE TABLESPACE source_ts LOCATION '';\n" + "CREATE TABLESPACE target_ts LOCATION '';\n" + "CREATE DATABASE template_db IS_TEMPLATE = true;\n" + "SELECT pg_create_physical_replication_slot('slot', true);" + ) + primary.backup("my_backup") + + standby = create_pg( + "standby2_" + strategy, + from_backup=(primary, "my_backup"), + has_streaming=True, + start=False, + ) + standby.append_conf("allow_in_place_tablespaces = on") + standby.append_conf("primary_slot_name = slot") + standby.start() + primary.wait_for_catchup(standby, "write") + + # Immediate shutdown right after CREATE/DROP DATABASE/TABLESPACE makes + # CREATE DATABASE WAL records apply to already-removed directories. + primary.safe_psql(_WORKLOAD.format(strategy=strategy)) + primary.wait_for_catchup(standby, "write") + + standby.safe_psql("ALTER SYSTEM SET log_min_messages TO debug1;") + standby.stop("immediate") + assert standby.start(fail_ok=True) is True, "standby node started for " + strategy + standby.stop("immediate") + + +def test_replay_tsp_drops(create_pg): + """Replaying CREATE/DROP database/tablespace tolerates missing dirs.""" + _test_tablespace(create_pg, "FILE_COPY") + _test_tablespace(create_pg, "WAL_LOG") + + # A missing tablespace directory during CREATE DATABASE replay must be + # detected once the standby is consistent (FILE_COPY only). + primary = create_pg("primary2", allows_streaming=True) + primary.safe_psql( + "SET allow_in_place_tablespaces=on;\nCREATE TABLESPACE ts1 LOCATION ''" + ) + primary.safe_psql("CREATE DATABASE db1 WITH TABLESPACE ts1 STRATEGY=FILE_COPY") + primary.backup("my_backup") + + standby = create_pg( + "standby3", from_backup=(primary, "my_backup"), has_streaming=True, start=False + ) + standby.append_conf("allow_in_place_tablespaces = on") + standby.start() + standby.poll_query_until("SELECT 1", expected="1") + + # Remove the standby's tablespace directory so it is missing on replay. + tspoid = standby.safe_psql("SELECT oid FROM pg_tablespace WHERE spcname = 'ts1';") + shutil.rmtree(standby.datadir / "pg_tblspc" / tspoid) + + logstart = standby.current_log_position() + primary.safe_psql( + "CREATE TABLE should_not_replay_insertion(a int);\n" + "CREATE DATABASE db2 WITH TABLESPACE ts1 STRATEGY=FILE_COPY;\n" + "INSERT INTO should_not_replay_insertion VALUES (1);" + ) + + pattern = r"WARNING: ( [A-Z0-9]+:)? creating missing directory: pg_tblspc/" + detected = False + for _ in range(10 * 180): + if standby.log_matches(pattern, logstart): + detected = True + break + time.sleep(0.1) + assert detected, "invalid directory creation is detected" diff --git a/src/test/recovery/pyt/test_034_create_database.py b/src/test/recovery/pyt/test_034_create_database.py new file mode 100644 index 0000000000000..f47e9cb42c39d --- /dev/null +++ b/src/test/recovery/pyt/test_034_create_database.py @@ -0,0 +1,43 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/034_create_database.pl. + +Test WAL replay for CREATE DATABASE .. STRATEGY WAL_LOG. +""" + + +def test_create_database(create_pg): + """DDL on the template persists; the new database has no template tables.""" + node = create_pg("node") + + db_template = "template1" + db_new = "test_db_1" + + # DDLs on the template database that modify pg_class must persist after + # creating a database from it with the WAL_LOG strategy (a direct copy of + # the template's pg_class is used). + node.safe_psql( + "CREATE DATABASE {} STRATEGY WAL_LOG TEMPLATE {};".format(db_new, db_template) + ) + node.safe_psql("CREATE TABLE tab_db_after_create_1 (a INT);", dbname=db_template) + + # Flush the changes affecting the template database, then replay them. + node.safe_psql("CHECKPOINT;") + node.stop("immediate") + node.start() + + assert ( + node.safe_psql( + "SELECT count(*) FROM pg_class WHERE relname LIKE 'tab_db_%';", + dbname=db_template, + ) + == "1" + ), "table exists on template after crash, with checkpoint" + + assert ( + node.safe_psql( + "SELECT count(*) FROM pg_class WHERE relname LIKE 'tab_db_%';", + dbname=db_new, + ) + == "0" + ), "no tables from template on new database after crash" diff --git a/src/test/recovery/pyt/test_035_standby_logical_decoding.py b/src/test/recovery/pyt/test_035_standby_logical_decoding.py new file mode 100644 index 0000000000000..46e21bab159da --- /dev/null +++ b/src/test/recovery/pyt/test_035_standby_logical_decoding.py @@ -0,0 +1,922 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/035_standby_logical_decoding.pl. + +Logical decoding on a hot standby and its conflict-with-recovery behaviour. +A primary (wal_level=logical) feeds a standby that holds logical replication +slots; primary-side actions that advance the catalog xid horizon (VACUUM / +VACUUM FULL on pg_class and pg_authid, on-access pruning of a +user_catalog_table, lowering wal_level, DROP DATABASE) must invalidate the +standby's logical slots with the expected invalidation_reason, log +"invalidating obsolete replication slot", and bump +pg_stat_database_conflicts.confl_active_logicalslot. Also covers: a standby +refusing to start with a pre-existing logical slot while hot_standby = off, +basic SQL and pg_recvlogical decoding on the standby, subscribing on the +standby to a primary publication, and decoding pre/post-promotion rows on the +promoted standby and its cascading standby. An injection point +'skip-log-running-xacts' (attached as 'error') prevents xl_running_xacts from +advancing the active slot's catalog_xmin during the conflict scenarios. + +Requires an injection-points build. +""" + +import os +import re +import subprocess +import threading +import time +from typing import cast + +import pytest + +import pypg +from pypg import PostgresServer + +# Name for the physical slot on primary +_PRIMARY_SLOTNAME = "primary_physical" +_STANDBY_PHYSICAL_SLOTNAME = "standby_physical" + +_EXPECTED_BASIC = ( + "BEGIN\n" + "table public.decoding_test: INSERT: x[integer]:1 y[text]:'1'\n" + "table public.decoding_test: INSERT: x[integer]:2 y[text]:'2'\n" + "table public.decoding_test: INSERT: x[integer]:3 y[text]:'3'\n" + "table public.decoding_test: INSERT: x[integer]:4 y[text]:'4'\n" + "COMMIT" +) + +_EXPECTED_PROMOTION = ( + "BEGIN\n" + "table public.decoding_test: INSERT: x[integer]:1 y[text]:'1'\n" + "table public.decoding_test: INSERT: x[integer]:2 y[text]:'2'\n" + "table public.decoding_test: INSERT: x[integer]:3 y[text]:'3'\n" + "table public.decoding_test: INSERT: x[integer]:4 y[text]:'4'\n" + "COMMIT\n" + "BEGIN\n" + "table public.decoding_test: INSERT: x[integer]:5 y[text]:'5'\n" + "table public.decoding_test: INSERT: x[integer]:6 y[text]:'6'\n" + "table public.decoding_test: INSERT: x[integer]:7 y[text]:'7'\n" + "COMMIT" +) + + +_SQL_DROP_CONFLICT_TABLE = ( + "CREATE TABLE conflict_test(x integer, y text);\nDROP TABLE conflict_test;" +) + + +class _RecvLogical: + """A background ``pg_recvlogical --start`` process for a standby slot. + + The Python analogue of the ``IPC::Run::start`` handle the Perl test keeps in + ``$handle``: stdout/stderr are captured by reader threads so the test can + poll the accumulated stdout for a pattern (mirroring ``pump_until``) and, on + ``finish``, inspect the exit code and stderr (mirroring + ``check_pg_recvlogical_stderr``). + """ + + def __init__(self, bindir, slot_name, connstr, env, timeout): + self._cmd = [ + str(bindir / "pg_recvlogical"), + "--dbname", + connstr, + "--slot", + slot_name, + "--option", + "include-xids=0", + "--option", + "skip-empty-xacts=1", + "--file", + "-", + "--no-loop", + "--start", + ] + self._timeout = timeout + self._lock = threading.Lock() + self._stdout = "" + self._stderr = "" + # pylint: disable=consider-using-with # long-lived; closed in finish() + self._proc = subprocess.Popen( + self._cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + encoding="utf-8", + errors="replace", + bufsize=1, + ) + self._threads = [ + threading.Thread(target=self._reader, args=("out",), daemon=True), + threading.Thread(target=self._reader, args=("err",), daemon=True), + ] + for thread in self._threads: + thread.start() + + def _reader(self, which): + stream = self._proc.stdout if which == "out" else self._proc.stderr + assert stream is not None + for line in iter(stream.readline, ""): + with self._lock: + if which == "out": + self._stdout += line + else: + self._stderr += line + + @property + def stdout(self): + """The accumulated stdout captured so far.""" + with self._lock: + return self._stdout + + @property + def stderr(self): + """The accumulated stderr captured so far.""" + with self._lock: + return self._stderr + + def pump_until(self, pattern): + """Poll captured stdout until pattern matches; raise on timeout. + + Mirrors ``PostgreSQL::Test::Utils::pump_until`` over the recvlogical + handle: the regex is applied (DOTALL) to all stdout seen so far. + """ + regex = re.compile(pattern, re.DOTALL) + deadline = time.monotonic() + self._timeout + while True: + with self._lock: + if regex.search(self._stdout): + return + if self._proc.poll() is not None: + with self._lock: + if regex.search(self._stdout): + return + raise AssertionError( + "pg_recvlogical exited before producing expected output;" + " stdout:\n{}\nstderr:\n{}".format(self._stdout, self._stderr) + ) + if time.monotonic() > deadline: + raise TimeoutError( + "timed out waiting for pg_recvlogical stdout to match {!r};" + " stdout:\n{}\nstderr:\n{}".format( + pattern, self._stdout, self._stderr + ) + ) + time.sleep(0.05) + + def finish(self): + """Wait for the process to exit and return its exit code. + + Mirrors ``$handle->finish``: closes stdout/stderr after the child is + done so the captured buffers are complete. Only valid once the server + has terminated the client (e.g. after a slot conflict); a still- + streaming ``--no-loop --start`` would never exit on its own. + """ + returncode = self._proc.wait(timeout=self._timeout) + for thread in self._threads: + thread.join(timeout=1) + for stream in (self._proc.stdout, self._proc.stderr): + if stream is not None: + stream.close() + return returncode + + def terminate(self): + """Kill the background process if it is still running and reap it. + + Used at test cleanup to mirror the Perl harness tearing down a cluster + with a still-attached pg_recvlogical (which never exits on its own). + """ + if self._proc.poll() is None: + self._proc.terminate() + try: + self._proc.wait(timeout=self._timeout) + except subprocess.TimeoutExpired: + self._proc.kill() + self._proc.wait(timeout=self._timeout) + for thread in self._threads: + thread.join(timeout=1) + for stream in (self._proc.stdout, self._proc.stderr): + if stream is not None and not stream.closed: + stream.close() + + +class _Nodes: + """Holds the four cluster nodes and shared per-run state.""" + + def __init__(self, bindir, timeout): + self.bindir = bindir + self.timeout = timeout + self.handles: list["_RecvLogical"] = [] + # Assigned in stages by the _init_*/setup helpers below. They are typed + # non-optional (and cast from None) so the type checker tracks the + # server API; every helper sets a node before any other reads it. + self.primary = cast(PostgresServer, None) + self.standby = cast(PostgresServer, None) + self.cascading_standby = cast(PostgresServer, None) + self.subscriber = cast(PostgresServer, None) + self.handle = cast("_RecvLogical", None) + + +def _wait_for_xmins(node, slotname, check_expr): + """Wait until the slot's xmin columns satisfy check_expr.""" + assert node.poll_query_until( + "SELECT {expr}\n" + "FROM pg_catalog.pg_replication_slots\n" + "WHERE slot_name = '{slot}';".format(expr=check_expr, slot=slotname) + ), "Timed out waiting for slot xmins to advance" + + +def _create_logical_slots(ctx, node, slot_prefix): + """Create the required logical slots on a standby (active + inactive).""" + node.create_logical_slot_on_standby( + ctx.primary, slot_prefix + "inactiveslot", "testdb" + ) + node.create_logical_slot_on_standby( + ctx.primary, slot_prefix + "activeslot", "testdb" + ) + + +def _drop_logical_slots(ctx, slot_prefix): + """Drop the logical slots on the standby.""" + ctx.standby.psql_capture( + "SELECT pg_drop_replication_slot('{}inactiveslot')".format(slot_prefix) + ) + ctx.standby.psql_capture( + "SELECT pg_drop_replication_slot('{}activeslot')".format(slot_prefix) + ) + + +def _make_slot_active(ctx, node, slot_prefix, wait): + """Acquire a standby 'activeslot' via background pg_recvlogical. + + With wait=True, poll until the slot has a non-NULL active_pid (mirrors the + Perl helper's success path); otherwise this is a known-failure scenario. + """ + active_slot = slot_prefix + "activeslot" + handle = _RecvLogical( + ctx.bindir, + active_slot, + node.connstr("testdb"), + node._connenv(), # pylint: disable=protected-access + ctx.timeout, + ) + ctx.handles.append(handle) + if wait: + assert node.poll_query_until( + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots " + "WHERE slot_name = '{}' AND active_pid IS NOT NULL)".format(active_slot), + dbname="testdb", + ), "slot never became active" + return handle + + +def _check_pg_recvlogical_stderr(handle, check_stderr): + """Assert the recvlogical client exited non-zero and stderr matches.""" + returncode = handle.finish() + assert returncode != 0, "pg_recvlogical exited non-zero" + assert re.search(check_stderr, handle.stderr), "slot has been invalidated" + + +def _check_slots_dropped(ctx, slot_prefix, handle): + """Assert both standby slots were dropped and the client conflicted.""" + assert ( + ctx.standby.slot(slot_prefix + "inactiveslot")["slot_type"] == "" + ), "inactiveslot on standby dropped" + assert ( + ctx.standby.slot(slot_prefix + "activeslot")["slot_type"] == "" + ), "activeslot on standby dropped" + _check_pg_recvlogical_stderr(handle, "conflict with recovery") + + +def _change_hsf_and_wait_for_xmins(ctx, hsf, invalidated): + """Set hot_standby_feedback and wait for the expected xmin state.""" + ctx.standby.append_conf("\nhot_standby_feedback = {}\n".format(hsf)) + ctx.standby.reload() + if hsf and invalidated: + _wait_for_xmins( + ctx.primary, + _PRIMARY_SLOTNAME, + "xmin IS NOT NULL AND catalog_xmin IS NULL", + ) + elif hsf: + _wait_for_xmins( + ctx.primary, + _PRIMARY_SLOTNAME, + "xmin IS NOT NULL AND catalog_xmin IS NOT NULL", + ) + else: + _wait_for_xmins( + ctx.primary, + _PRIMARY_SLOTNAME, + "xmin IS NULL AND catalog_xmin IS NULL", + ) + + +def _check_slots_conflict_reason(ctx, slot_prefix, reason): + """Assert invalidation_reason of both conflicting slots equals reason.""" + for kind in ("activeslot", "inactiveslot"): + slot = slot_prefix + kind + res = ctx.standby.safe_psql( + "select invalidation_reason from pg_replication_slots " + "where slot_name = '{}' and conflicting;".format(slot) + ) + assert res == reason, "{} reason for conflict is {}".format(slot, reason) + + +def _reactive_slots_change_hfs_and_wait_for_xmins( + ctx, previous_slot_prefix, slot_prefix, hsf, invalidated +): + """Re-create slots under a new prefix, set hsf, activate, reset stats.""" + _drop_logical_slots(ctx, previous_slot_prefix) + _create_logical_slots(ctx, ctx.standby, slot_prefix) + _change_hsf_and_wait_for_xmins(ctx, hsf, invalidated) + ctx.handle = _make_slot_active(ctx, ctx.standby, slot_prefix, True) + # reset stat: easier to check confl_active_logicalslot + ctx.standby.psql_capture("select pg_stat_reset();", dbname="testdb") + + +def _check_for_invalidation(ctx, slot_prefix, log_start, test_name): + """Assert invalidation is logged for both slots and conflict stat bumped.""" + active_slot = slot_prefix + "activeslot" + inactive_slot = slot_prefix + "inactiveslot" + assert ctx.standby.log_matches( + 'invalidating obsolete replication slot "{}"'.format(inactive_slot), + log_start, + ), "inactiveslot slot invalidation is logged {}".format(test_name) + assert ctx.standby.log_matches( + 'invalidating obsolete replication slot "{}"'.format(active_slot), + log_start, + ), "activeslot slot invalidation is logged {}".format(test_name) + assert ctx.standby.poll_query_until( + "select (confl_active_logicalslot = 1) from pg_stat_database_conflicts " + "where datname = 'testdb'" + ), "Timed out waiting confl_active_logicalslot to be updated" + + +def _wait_until_vacuum_can_remove(ctx, vac_option, sql, to_vac): + """Advance the xid horizon then VACUUM, guarding xl_running_xacts. + + The injection point keeps xl_running_xacts from advancing the active slot's + catalog_xmin, which would otherwise prevent the expected conflict. + """ + ctx.primary.safe_psql( + "SELECT injection_points_attach('skip-log-running-xacts', 'error');", + dbname="testdb", + ) + xid_horizon = ctx.primary.safe_psql( + "select pg_snapshot_xmin(pg_current_snapshot());", dbname="testdb" + ) + ctx.primary.safe_psql(sql, dbname="testdb") + assert ctx.primary.poll_query_until( + "SELECT (select pg_snapshot_xmin(pg_current_snapshot())::text::int " + "- {}) > 0".format(xid_horizon), + dbname="testdb", + ), "new snapshot does not have a newer horizon" + ctx.primary.safe_psql( + "VACUUM {} verbose {};\n" + "INSERT INTO flush_wal DEFAULT VALUES;".format(vac_option, to_vac), + dbname="testdb", + ) + ctx.primary.wait_for_replay_catchup(ctx.standby) + ctx.primary.safe_psql( + "SELECT injection_points_detach('skip-log-running-xacts');", + dbname="testdb", + ) + + +def _init_primary(ctx, create_pg): + """Initialize the primary node and the b1 backup; return the backup name.""" + primary = create_pg( + "primary", allows_streaming=True, has_archiving=True, start=False + ) + ctx.primary = primary + primary.append_conf( + "\nwal_level = 'logical'\nmax_replication_slots = 4\n" + "max_wal_senders = 4\nautovacuum = off\n" + ) + primary.dump_info() + primary.start() + if not primary.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + primary.psql_capture("CREATE DATABASE testdb") + primary.safe_psql( + "SELECT * FROM pg_create_physical_replication_slot('{}');".format( + _PRIMARY_SLOTNAME + ), + dbname="testdb", + ) + assert ( + primary.safe_psql( + "SELECT conflicting is null FROM pg_replication_slots " + "where slot_name = '{}';".format(_PRIMARY_SLOTNAME) + ) + == "t" + ), "Physical slot reports conflicting as NULL" + backup_name = "b1" + primary.backup(backup_name) + # flush_wal lets us force a WAL flush after a VACUUM, which does not flush. + primary.psql_capture("CREATE TABLE flush_wal();", dbname="testdb") + return backup_name + + +def _init_standby(ctx, create_pg, backup_name): + """Initialize the streaming+restoring standby and wait for catchup.""" + standby = create_pg( + "standby", + from_backup=(ctx.primary, backup_name), + has_streaming=True, + has_restoring=True, + start=False, + ) + ctx.standby = standby + standby.append_conf( + "primary_slot_name = '{}'\nmax_replication_slots = 5".format(_PRIMARY_SLOTNAME) + ) + standby.start() + ctx.primary.wait_for_replay_catchup(standby) + + +def _test_hot_standby_off_refusal(ctx): + """A pre-existing logical slot makes the standby refuse hot_standby = off.""" + ctx.standby.create_logical_slot_on_standby(ctx.primary, "restart_test", "postgres") + ctx.standby.stop() + ctx.standby.append_conf("hot_standby = off") + # The server is expected to fail during startup, so do not use start(). + ctx.standby.bin.run_command( + [ + "pg_ctl", + "--pgdata", + str(ctx.standby.datadir), + "--log", + str(ctx.standby.log), + "start", + ] + ) + pidfile = ctx.standby.datadir / "postmaster.pid" + deadline = time.monotonic() + ctx.timeout + while pidfile.exists() and time.monotonic() < deadline: + time.sleep(0.1) + logfile = pypg.slurp_file(ctx.standby.log) + assert re.search( + r'FATAL: .* logical replication slot ".*" exists on the standby, ' + r'but "hot_standby" = "off"', + logfile, + ), "the standby ends with an error during startup because hot_standby disabled" + ctx.standby.adjust_conf("hot_standby", "on") + ctx.standby.start() + ctx.standby.safe_psql("SELECT pg_drop_replication_slot('restart_test')") + + +def _test_basic_decoding(ctx): + """Basic SQL and pg_recvlogical decoding work on the standby.""" + ctx.primary.safe_psql( + "CREATE TABLE decoding_test(x integer, y text);", dbname="testdb" + ) + ctx.primary.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT s, s::text " + "FROM generate_series(1,10) s;", + dbname="testdb", + ) + ctx.primary.wait_for_replay_catchup(ctx.standby) + result = ctx.standby.safe_psql( + "SELECT pg_logical_slot_get_changes('behaves_ok_activeslot', NULL, NULL);", + dbname="testdb", + ) + assert ( + len(result.split("\n")) == 14 + ), "Decoding produced 14 rows (2 BEGIN/COMMIT and 10 rows)" + ctx.primary.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT s, s::text " + "FROM generate_series(1,4) s;", + dbname="testdb", + ) + ctx.primary.wait_for_replay_catchup(ctx.standby) + stdout_sql = ctx.standby.safe_psql( + "SELECT data FROM pg_logical_slot_peek_changes('behaves_ok_activeslot', " + "NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", + dbname="testdb", + ) + assert stdout_sql == _EXPECTED_BASIC, "got expected output from SQL decoding" + endpos = ctx.standby.safe_psql( + "SELECT lsn FROM pg_logical_slot_peek_changes('behaves_ok_activeslot', " + "NULL, NULL) ORDER BY lsn DESC LIMIT 1;", + dbname="testdb", + ) + ctx.primary.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT s, s::text " + "FROM generate_series(5,50) s;", + dbname="testdb", + ) + ctx.primary.wait_for_replay_catchup(ctx.standby) + opts = {"include-xids": "0", "skip-empty-xacts": "1"} + stdout_recv = ctx.standby.pg_recvlogical_upto( + "testdb", "behaves_ok_activeslot", endpos, ctx.timeout, options=opts + ) + assert ( + stdout_recv.rstrip("\n") == _EXPECTED_BASIC + ), "got same expected output from pg_recvlogical decoding session" + assert ctx.standby.poll_query_until( + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots " + "WHERE slot_name = 'behaves_ok_activeslot' AND active_pid IS NULL)", + dbname="testdb", + ), "slot never became inactive" + stdout_recv = ctx.standby.pg_recvlogical_upto( + "testdb", "behaves_ok_activeslot", endpos, ctx.timeout, options=opts + ) + assert stdout_recv.rstrip("\n") == "", "pg_recvlogical acknowledged changes" + ctx.primary.safe_psql("CREATE DATABASE otherdb") + ctx.primary.wait_for_replay_catchup(ctx.standby) + res = ctx.standby.psql_capture( + "SELECT lsn FROM pg_logical_slot_peek_changes('behaves_ok_activeslot', " + "NULL, NULL) ORDER BY lsn DESC LIMIT 1;", + dbname="otherdb", + ) + assert re.search( + r'replication slot "behaves_ok_activeslot" was not created in this database', + res.stderr, + ), "replaying logical slot from another database fails" + + +def _test_subscribe_on_standby(ctx): + """Subscribe on the standby to a primary publication and verify replication.""" + ctx.primary.safe_psql("CREATE TABLE tab_rep (a int primary key)") + ctx.subscriber.safe_psql("CREATE TABLE tab_rep (a int primary key)") + ctx.primary.safe_psql("CREATE PUBLICATION tap_pub for table tab_rep") + ctx.primary.wait_for_replay_catchup(ctx.standby) + standby_connstr = ctx.standby.connstr() + " dbname=postgres" + # Use a background psql so we can run pg_log_standby_snapshot() on the + # primary while CREATE SUBSCRIPTION is still waiting. + sub_psql = ctx.subscriber.background_psql() + sub_psql.send( + "CREATE SUBSCRIPTION tap_sub\n" + " CONNECTION '{}'\n" + " PUBLICATION tap_pub\n" + " WITH (copy_data = off);\n".format(standby_connstr) + ) + ctx.primary.log_standby_snapshot(ctx.standby, "tap_sub") + sub_psql.quit() + ctx.subscriber.wait_for_subscription_sync(ctx.standby, "tap_sub") + ctx.primary.safe_psql("INSERT INTO tab_rep select generate_series(1,10);") + ctx.primary.wait_for_replay_catchup(ctx.standby) + ctx.standby.wait_for_catchup("tap_sub") + assert ( + ctx.subscriber.safe_psql("SELECT count(*) FROM tab_rep") == "10" + ), "check replicated inserts after subscription on standby" + ctx.subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + ctx.subscriber.stop() + + +def _test_vacuum_full_scenario(ctx): + """Scenario 1: hot_standby_feedback off and VACUUM FULL on pg_class.""" + _reactive_slots_change_hfs_and_wait_for_xmins( + ctx, "behaves_ok_", "vacuum_full_", 0, 1 + ) + ctx.primary.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT 100,'100';", dbname="testdb" + ) + assert ctx.standby.poll_query_until( + "SELECT total_txns > 0 FROM pg_stat_replication_slots " + "WHERE slot_name = 'vacuum_full_activeslot'", + dbname="testdb", + ), "replication slot stats of vacuum_full_activeslot not updated" + _wait_until_vacuum_can_remove( + ctx, + "full", + _SQL_DROP_CONFLICT_TABLE, + "pg_class", + ) + _check_for_invalidation(ctx, "vacuum_full_", 1, "with vacuum FULL on pg_class") + _check_slots_conflict_reason(ctx, "vacuum_full_", "rows_removed") + res = ctx.standby.psql_capture( + "ALTER_REPLICATION_SLOT vacuum_full_inactiveslot (failover);", + replication="database", + ) + assert re.search( + r'ERROR: can no longer access replication slot "vacuum_full_inactiveslot"', + res.stderr, + ) and re.search( + r'DETAIL: This replication slot has been invalidated due to "rows_removed".', + res.stderr, + ), "invalidated slot cannot be altered" + assert ( + ctx.standby.safe_psql( + "SELECT total_txns > 0 FROM pg_stat_replication_slots " + "WHERE slot_name = 'vacuum_full_activeslot'", + dbname="testdb", + ) + == "t" + ), "replication slot stats not removed after invalidation" + ctx.handle = _make_slot_active(ctx, ctx.standby, "vacuum_full_", False) + _check_pg_recvlogical_stderr( + ctx.handle, + 'can no longer access replication slot "vacuum_full_activeslot"', + ) + res = ctx.standby.psql_capture( + "select pg_copy_logical_replication_slot('vacuum_full_inactiveslot', " + "'vacuum_full_inactiveslot_copy');", + replication="database", + ) + assert re.search( + r"ERROR: cannot copy invalidated replication slot " + r'"vacuum_full_inactiveslot"', + res.stderr, + ), "invalidated slot cannot be copied" + _change_hsf_and_wait_for_xmins(ctx, 1, 1) + + +def _test_invalidation_survives_restart_and_no_wal(ctx): + """Invalidated slots stay invalidated across restart and free WAL.""" + ctx.standby.restart() + _check_slots_conflict_reason(ctx, "vacuum_full_", "rows_removed") + restart_lsn = ctx.standby.safe_psql( + "SELECT restart_lsn FROM pg_replication_slots\n" + " WHERE slot_name = 'vacuum_full_activeslot' AND conflicting;" + ) + walfile_name = ctx.primary.safe_psql( + "SELECT pg_walfile_name('{}')".format(restart_lsn) + ) + ctx.primary.advance_wal(1) + ctx.primary.safe_psql("checkpoint;") + ctx.primary.wait_for_replay_catchup(ctx.standby) + ctx.standby.safe_psql("checkpoint;") + standby_walfile = ctx.standby.datadir / "pg_wal" / walfile_name + assert ( + not standby_walfile.is_file() + ), "invalidated logical slots do not lead to retaining WAL" + + +def _test_row_removal_scenario(ctx): + """Scenario 2: conflict due to row removal (VACUUM on pg_class).""" + logstart = ctx.standby.current_log_position() + _reactive_slots_change_hfs_and_wait_for_xmins( + ctx, "vacuum_full_", "row_removal_", 0, 1 + ) + _wait_until_vacuum_can_remove( + ctx, + "", + _SQL_DROP_CONFLICT_TABLE, + "pg_class", + ) + _check_for_invalidation(ctx, "row_removal_", logstart, "with vacuum on pg_class") + _check_slots_conflict_reason(ctx, "row_removal_", "rows_removed") + ctx.handle = _make_slot_active(ctx, ctx.standby, "row_removal_", False) + _check_pg_recvlogical_stderr( + ctx.handle, + 'can no longer access replication slot "row_removal_activeslot"', + ) + + +def _test_shared_row_removal_scenario(ctx): + """Scenario 3: conflict due to row removal on a shared catalog (pg_authid).""" + logstart = ctx.standby.current_log_position() + _reactive_slots_change_hfs_and_wait_for_xmins( + ctx, "row_removal_", "shared_row_removal_", 0, 1 + ) + _wait_until_vacuum_can_remove( + ctx, + "", + "CREATE ROLE create_trash;\nDROP ROLE create_trash;", + "pg_authid", + ) + _check_for_invalidation( + ctx, "shared_row_removal_", logstart, "with vacuum on pg_authid" + ) + _check_slots_conflict_reason(ctx, "shared_row_removal_", "rows_removed") + ctx.handle = _make_slot_active(ctx, ctx.standby, "shared_row_removal_", False) + _check_pg_recvlogical_stderr( + ctx.handle, + 'can no longer access replication slot "shared_row_removal_activeslot"', + ) + + +def _test_no_conflict_scenario(ctx): + """Scenario 4: VACUUM on a non-catalog table; no conflict expected.""" + logstart = ctx.standby.current_log_position() + _reactive_slots_change_hfs_and_wait_for_xmins( + ctx, "shared_row_removal_", "no_conflict_", 0, 1 + ) + _wait_until_vacuum_can_remove( + ctx, + "", + "CREATE TABLE conflict_test(x integer, y text);\n" + "INSERT INTO conflict_test(x,y) SELECT s, s::text " + "FROM generate_series(1,4) s;\n" + "UPDATE conflict_test set x=1, y=1;", + "conflict_test", + ) + assert not ctx.standby.log_matches( + 'invalidating obsolete replication slot "no_conflict_inactiveslot"', + logstart, + ), "inactiveslot slot invalidation is not logged with vacuum on conflict_test" + assert not ctx.standby.log_matches( + 'invalidating obsolete replication slot "no_conflict_activeslot"', + logstart, + ), "activeslot slot invalidation is not logged with vacuum on conflict_test" + assert ctx.standby.poll_query_until( + "select (confl_active_logicalslot = 0) from pg_stat_database_conflicts " + "where datname = 'testdb'" + ), "Timed out waiting confl_active_logicalslot to be updated" + assert ( + ctx.standby.safe_psql( + "select bool_or(conflicting) from\n" + " (select conflicting from pg_replication_slots\n" + " where slot_type = 'logical')" + ) + == "f" + ), "Logical slots are reported as non conflicting" + _change_hsf_and_wait_for_xmins(ctx, 1, 0) + ctx.standby.restart() + + +def _test_pruning_scenario(ctx): + """Scenario 5: conflict due to on-access pruning of a user_catalog_table.""" + logstart = ctx.standby.current_log_position() + _reactive_slots_change_hfs_and_wait_for_xmins(ctx, "no_conflict_", "pruning_", 0, 0) + ctx.primary.safe_psql( + "SELECT injection_points_attach('skip-log-running-xacts', 'error');", + dbname="testdb", + ) + ctx.primary.safe_psql( + "CREATE TABLE prun(id integer, s char(2000)) " + "WITH (fillfactor = 75, user_catalog_table = true);", + dbname="testdb", + ) + ctx.primary.safe_psql("INSERT INTO prun VALUES (1, 'A');", dbname="testdb") + for letter in ("B", "C", "D", "E"): + ctx.primary.safe_psql( + "UPDATE prun SET s = '{}';".format(letter), dbname="testdb" + ) + ctx.primary.wait_for_replay_catchup(ctx.standby) + ctx.primary.safe_psql( + "SELECT injection_points_detach('skip-log-running-xacts');", + dbname="testdb", + ) + _check_for_invalidation(ctx, "pruning_", logstart, "with on-access pruning") + _check_slots_conflict_reason(ctx, "pruning_", "rows_removed") + ctx.handle = _make_slot_active(ctx, ctx.standby, "pruning_", False) + _check_pg_recvlogical_stderr( + ctx.handle, + 'can no longer access replication slot "pruning_activeslot"', + ) + _change_hsf_and_wait_for_xmins(ctx, 1, 1) + + +def _test_wal_level_scenario(ctx): + """Scenario 6: lowering primary wal_level invalidates the slots.""" + logstart = ctx.standby.current_log_position() + _drop_logical_slots(ctx, "pruning_") + _create_logical_slots(ctx, ctx.standby, "wal_level_") + ctx.handle = _make_slot_active(ctx, ctx.standby, "wal_level_", True) + ctx.standby.psql_capture("select pg_stat_reset();", dbname="testdb") + ctx.primary.append_conf("\nwal_level = 'replica'\n") + ctx.primary.restart() + ctx.primary.wait_for_replay_catchup(ctx.standby) + _check_for_invalidation(ctx, "wal_level_", logstart, "due to wal_level") + _check_slots_conflict_reason(ctx, "wal_level_", "wal_level_insufficient") + ctx.handle = _make_slot_active(ctx, ctx.standby, "wal_level_", False) + _check_pg_recvlogical_stderr( + ctx.handle, + 'logical decoding on standby requires "effective_wal_level" >= ' + '"logical" on the primary', + ) + ctx.primary.append_conf("\nwal_level = 'logical'\n") + ctx.primary.restart() + ctx.primary.wait_for_replay_catchup(ctx.standby) + ctx.handle = _make_slot_active(ctx, ctx.standby, "wal_level_", False) + _check_pg_recvlogical_stderr( + ctx.handle, + 'can no longer access replication slot "wal_level_activeslot"', + ) + + +def _test_drop_database_scenario(ctx): + """DROP DATABASE drops its standby slots, including active ones.""" + _drop_logical_slots(ctx, "wal_level_") + _create_logical_slots(ctx, ctx.standby, "drop_db_") + ctx.handle = _make_slot_active(ctx, ctx.standby, "drop_db_", True) + ctx.standby.create_logical_slot_on_standby(ctx.primary, "otherslot", "postgres") + ctx.primary.safe_psql("DROP DATABASE testdb") + ctx.primary.wait_for_replay_catchup(ctx.standby) + assert ( + ctx.standby.safe_psql( + "SELECT EXISTS(SELECT 1 FROM pg_database WHERE datname = 'testdb')" + ) + == "f" + ), "database dropped on standby" + _check_slots_dropped(ctx, "drop_db", ctx.handle) + assert ( + ctx.standby.slot("otherslot")["slot_type"] == "logical" + ), "otherslot on standby not dropped" + ctx.standby.psql_capture("SELECT pg_drop_replication_slot('otherslot')") + + +def _setup_promotion(ctx, create_pg, backup_name): + """Recreate testdb, build a cascading standby, and activate promotion slots. + + Returns the cascading-standby recvlogical handle. + """ + ctx.standby.reload() + ctx.primary.psql_capture("CREATE DATABASE testdb") + ctx.primary.safe_psql( + "CREATE TABLE decoding_test(x integer, y text);", dbname="testdb" + ) + ctx.primary.wait_for_replay_catchup(ctx.standby) + ctx.standby.safe_psql( + "SELECT * FROM pg_create_physical_replication_slot('{}');".format( + _STANDBY_PHYSICAL_SLOTNAME + ), + dbname="testdb", + ) + ctx.standby.backup(backup_name) + cascading = create_pg( + "cascading_standby", + from_backup=(ctx.standby, backup_name), + has_streaming=True, + has_restoring=True, + start=False, + ) + ctx.cascading_standby = cascading + cascading.append_conf( + "primary_slot_name = '{}'\nhot_standby_feedback = on".format( + _STANDBY_PHYSICAL_SLOTNAME + ) + ) + cascading.start() + _create_logical_slots(ctx, ctx.standby, "promotion_") + ctx.standby.wait_for_replay_catchup(cascading, ctx.primary) + _create_logical_slots(ctx, cascading, "promotion_") + ctx.handle = _make_slot_active(ctx, ctx.standby, "promotion_", True) + cascading_handle = _make_slot_active(ctx, cascading, "promotion_", True) + return cascading_handle + + +def _test_promotion_scenario(ctx, create_pg, backup_name): + """Promote the standby and verify decoding of pre/post-promotion rows.""" + cascading_handle = _setup_promotion(ctx, create_pg, backup_name) + ctx.primary.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT s, s::text " + "FROM generate_series(1,4) s;", + dbname="testdb", + ) + ctx.primary.wait_for_replay_catchup(ctx.standby) + ctx.standby.wait_for_replay_catchup(ctx.cascading_standby, ctx.primary) + ctx.standby.promote() + ctx.standby.safe_psql( + "INSERT INTO decoding_test(x,y) SELECT s, s::text " + "FROM generate_series(5,7) s;", + dbname="testdb", + ) + ctx.standby.wait_for_replay_catchup(ctx.cascading_standby) + stdout_sql = ctx.standby.safe_psql( + "SELECT data FROM pg_logical_slot_peek_changes('promotion_inactiveslot', " + "NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", + dbname="testdb", + ) + assert ( + stdout_sql == _EXPECTED_PROMOTION + ), "got expected output from SQL decoding session on promoted standby" + ctx.handle.pump_until(r"^.*COMMIT.*COMMIT$") + assert ( + ctx.handle.stdout.rstrip("\n") == _EXPECTED_PROMOTION + ), "got same expected output from pg_recvlogical decoding session" + stdout_sql = ctx.cascading_standby.safe_psql( + "SELECT data FROM pg_logical_slot_peek_changes('promotion_inactiveslot', " + "NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", + dbname="testdb", + ) + assert ( + stdout_sql == _EXPECTED_PROMOTION + ), "got expected output from SQL decoding session on cascading standby" + cascading_handle.pump_until(r"^.*COMMIT.*COMMIT$") + assert ( + cascading_handle.stdout.rstrip("\n") == _EXPECTED_PROMOTION + ), "got same expected output from pg_recvlogical on cascading standby" + + +def test_035_standby_logical_decoding(create_pg, bindir): + """Logical decoding on a standby and recovery-conflict invalidation.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + ctx = _Nodes(bindir, pypg.test_timeout_default()) + try: + backup_name = _init_primary(ctx, create_pg) + _init_standby(ctx, create_pg, backup_name) + ctx.subscriber = create_pg("subscriber") + _test_hot_standby_off_refusal(ctx) + _create_logical_slots(ctx, ctx.standby, "behaves_ok_") + _test_basic_decoding(ctx) + _test_subscribe_on_standby(ctx) + ctx.primary.safe_psql("CREATE EXTENSION injection_points;", dbname="testdb") + _test_vacuum_full_scenario(ctx) + _test_invalidation_survives_restart_and_no_wal(ctx) + _test_row_removal_scenario(ctx) + _test_shared_row_removal_scenario(ctx) + _test_no_conflict_scenario(ctx) + _test_pruning_scenario(ctx) + _test_wal_level_scenario(ctx) + _test_drop_database_scenario(ctx) + _test_promotion_scenario(ctx, create_pg, backup_name) + finally: + # The still-streaming promotion pg_recvlogical clients never exit on + # their own; reap every background handle so nothing is left running. + for handle in ctx.handles: + handle.terminate() diff --git a/src/test/recovery/pyt/test_036_truncated_dropped.py b/src/test/recovery/pyt/test_036_truncated_dropped.py new file mode 100644 index 0000000000000..ab85d206327cc --- /dev/null +++ b/src/test/recovery/pyt/test_036_truncated_dropped.py @@ -0,0 +1,72 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/036_truncated_dropped.pl. + +Recovery where files are shorter than usual: replaying WAL for a relation that +was subsequently truncated or dropped. +""" + +_MAKE = ( + "CREATE TABLE truncme(i int) WITH (fillfactor = 50);\n" + "INSERT INTO truncme SELECT generate_series(1, 1000);\n" + "UPDATE truncme SET i = 1;\n" +) + + +def _crash_recover(node): + node.stop("immediate") + node.start() + + +def test_truncated_dropped(create_pg): + """PRUNE/TRUNCATE/DROP WAL replays cleanly after an immediate crash.""" + node = create_pg("n1", start=False) + # Disable autovacuum so VACUUM deterministically prunes/truncates. + node.append_conf("wal_level = 'replica'\nautovacuum = off") + node.start() + + # PRUNE records for a pre-existing, then dropped, relation. + node.safe_psql(_MAKE + "CHECKPOINT;\nVACUUM truncme;\nDROP TABLE truncme;\n") + _crash_recover(node) + + # PRUNE records for a newly created, then dropped, relation. + node.safe_psql(_MAKE + "VACUUM truncme;\nDROP TABLE truncme;\n") + _crash_recover(node) + + # PRUNE records affecting a truncated block, with FPIs. + node.safe_psql( + _MAKE + "CHECKPOINT;\nVACUUM truncme;\nTRUNCATE truncme;\n" + "INSERT INTO truncme SELECT generate_series(1, 10);\n" + ) + _crash_recover(node) + assert ( + node.safe_psql("select count(*), sum(i) FROM truncme") == "10|55" + ), "table contents as expected after recovery" + node.safe_psql("DROP TABLE truncme") + + # PRUNE records for blocks later truncated, without FPIs. + node.safe_psql( + _MAKE + "VACUUM truncme;\nTRUNCATE truncme;\n" + "INSERT INTO truncme SELECT generate_series(1, 10);\n" + ) + _crash_recover(node) + assert ( + node.safe_psql("select count(*), sum(i) FROM truncme") == "10|55" + ), "table contents as expected after recovery" + node.safe_psql("DROP TABLE truncme") + + # Partial truncation via VACUUM. + node.safe_psql( + "CREATE TABLE truncme(i int) WITH (fillfactor = 50);\n" + "INSERT INTO truncme SELECT generate_series(1, 1000);\n" + "UPDATE truncme SET i = i + 1;\n" + "DELETE FROM truncme WHERE i > 500;\n" + "VACUUM truncme;\n" + "INSERT INTO truncme SELECT generate_series(1000, 1010);\n" + ) + _crash_recover(node) + assert ( + node.safe_psql("select count(*), sum(i), min(i), max(i) FROM truncme") + == "510|136304|2|1010" + ), "table contents as expected after recovery" + node.safe_psql("DROP TABLE truncme") diff --git a/src/test/recovery/pyt/test_037_invalid_database.py b/src/test/recovery/pyt/test_037_invalid_database.py new file mode 100644 index 0000000000000..02571e9b08085 --- /dev/null +++ b/src/test/recovery/pyt/test_037_invalid_database.py @@ -0,0 +1,105 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/037_invalid_database.pl. + +Handling of interrupted DROP DATABASE and access to invalid databases. +""" + +import re + +_CANCEL_DO = """\ +DO $$ +BEGIN + WHILE NOT EXISTS(SELECT * FROM pg_locks WHERE NOT granted AND relation = 'pg_tablespace'::regclass AND mode = 'AccessShareLock') LOOP + PERFORM pg_sleep(.1); + END LOOP; +END$$; +SELECT pg_cancel_backend({pid});""" + + +def _mark_invalid_checks(node): + node.safe_psql( + "CREATE DATABASE regression_invalid;\n" + "UPDATE pg_database SET datconnlimit = -2 " + "WHERE datname = 'regression_invalid';" + ) + + result = node.psql_capture("", dbname="regression_invalid") + assert result.rc == 2, "can't connect to invalid database - error code" + assert re.search( + r'FATAL:\s+cannot connect to invalid database "regression_invalid"', + result.stderr, + ), "can't connect to invalid database - error message" + + assert ( + node.psql_capture("ALTER DATABASE regression_invalid CONNECTION LIMIT 10").rc + == 2 + ), "can't ALTER invalid database" + assert ( + node.psql_capture("CREATE DATABASE copy_invalid TEMPLATE regression_invalid").rc + == 3 + ), "can't use invalid database as template" + + # VACUUM must ignore an invalid database when truncating the clog. + result = node.psql_capture( + "UPDATE pg_database SET datfrozenxid = '123456' " + "WHERE datname = 'regression_invalid';\n" + "DROP TABLE IF EXISTS foo_tbl; CREATE TABLE foo_tbl();\n" + "VACUUM FREEZE;" + ) + assert not re.search( + r"some databases have not been vacuumed in over 2 billion transactions", + result.stderr, + ), "invalid databases are ignored by vac_truncate_clog" + + assert ( + node.psql_capture("DROP DATABASE regression_invalid").rc == 0 + ), "can DROP invalid database" + assert ( + node.psql_capture("DROP DATABASE regression_invalid").rc == 3 + ), "can't drop already dropped database" + + +def test_invalid_database(create_pg): + """Invalid databases reject connections/ALTER; interrupted DROP is handled.""" + node = create_pg("node", start=False) + node.append_conf( + "autovacuum = off\n" + "max_prepared_transactions=5\n" + "log_min_duration_statement=0\n" + "log_connections=receipt\n" + "log_disconnections=on" + ) + node.start() + + _mark_invalid_checks(node) + + # Interrupt DROP DATABASE while it waits on a lock held by a 2PC xact. + cancel = node.background_psql(on_error_stop=True) + bgpsql = node.background_psql(on_error_stop=False) + pid = bgpsql.query("SELECT pg_backend_pid()").strip() + + bgpsql.query( + "CREATE DATABASE regression_invalid_interrupt;\n" + "BEGIN;\n" + "LOCK pg_tablespace;\n" + "PREPARE TRANSACTION 'lock_tblspc';" + ) + # Fire the DROP; it blocks on the still-held lock. + bgpsql.query_until(r"", "DROP DATABASE regression_invalid_interrupt;\n") + + # Wait until the DROP is blocked, then cancel it. + cancel.query(_CANCEL_DO.format(pid=pid)) + cancel.quit() + + bgpsql.wait_for_stderr(r"canceling statement due to user request") + bgpsql.clear() + + assert ( + node.psql_capture("", dbname="regression_invalid_interrupt").rc == 2 + ), "can't connect to invalid_interrupt database" + + # Release the lock and drop the database for real. + bgpsql.query("ROLLBACK PREPARED 'lock_tblspc'") + bgpsql.query("DROP DATABASE regression_invalid_interrupt") + bgpsql.quit() diff --git a/src/test/recovery/pyt/test_038_save_logical_slots_shutdown.py b/src/test/recovery/pyt/test_038_save_logical_slots_shutdown.py new file mode 100644 index 0000000000000..09a9fc3a4fb5a --- /dev/null +++ b/src/test/recovery/pyt/test_038_save_logical_slots_shutdown.py @@ -0,0 +1,77 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/038_save_logical_slots_shutdown.pl. + +Logical replication slots are always flushed to disk during a shutdown +checkpoint: the slot's confirmed_flush LSN must equal the latest checkpoint +location after a restart. +""" + +import re + +from pypg import slurp_file + +_STREAMING = ( + r"Streaming transactions committing after ([A-F0-9]+/[A-F0-9]+), " + r"reading WAL from ([A-F0-9]+/[A-F0-9]+)\." +) + + +def _advance_wal(node, num): + # pg_switch_wal() forces a WAL flush, making the non-transactional + # pg_logical_emit_message() safe to use. + for _ in range(num): + node.safe_psql( + "SELECT pg_logical_emit_message(false, '', 'foo');\n" + "SELECT pg_switch_wal();" + ) + + +def _latest_checkpoint(pg_bin, node): + result = pg_bin.result(["pg_controldata", str(node.datadir)]) + match = re.search( + r"^Latest checkpoint location:\s*(.*)$", result.stdout, re.MULTILINE + ) + assert match, "Latest checkpoint location not found in control file" + return match.group(1).strip() + + +def test_save_logical_slots_shutdown(pg_bin, create_pg): + """A logical slot's confirmed_flush LSN matches the shutdown checkpoint.""" + publisher = create_pg("pub", allows_streaming="logical", start=False) + # Avoid stray checkpoints so the latest checkpoint location stays put. + publisher.append_conf("checkpoint_timeout = 1h\nautovacuum = off") + publisher.start() + subscriber = create_pg("sub") + + publisher.safe_psql("CREATE TABLE test_tbl (id int)") + subscriber.safe_psql("CREATE TABLE test_tbl (id int)") + + # Advance the WAL segment so the shutdown checkpoint record (from the + # restart below) doesn't land on a new page, which would desync + # confirmed_flush_lsn from the checkpoint location. + _advance_wal(publisher, 1) + publisher.safe_psql("INSERT INTO test_tbl VALUES (generate_series(1, 5));") + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION pub FOR ALL TABLES") + subscriber.safe_psql( + "CREATE SUBSCRIPTION sub CONNECTION '{}' PUBLICATION pub".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "sub") + assert ( + subscriber.safe_psql("SELECT count(*) FROM test_tbl") == "5" + ), "check initial copy was done" + + offset = publisher.current_log_position() + # Restart to ensure the slot is flushed if required. + publisher.restart() + + publisher.wait_for_log(_STREAMING, offset) + match = re.search(_STREAMING, slurp_file(publisher.log, offset)) + assert match, "could not get confirmed_flush_lsn" + confirmed_flush = match.group(1) + + assert ( + _latest_checkpoint(pg_bin, publisher) == confirmed_flush + ), "slot's confirmed_flush LSN equals the latest checkpoint location" diff --git a/src/test/recovery/pyt/test_039_end_of_wal.py b/src/test/recovery/pyt/test_039_end_of_wal.py new file mode 100644 index 0000000000000..adfeb1c5305b3 --- /dev/null +++ b/src/test/recovery/pyt/test_039_end_of_wal.py @@ -0,0 +1,231 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/039_end_of_wal.pl. + +End-of-WAL detection: by writing crafted bytes at the WAL insert point (a record +header with a zero/short/over-long length, a bad prev-link, a bad CRC, or a page +header with a bad magic/pageaddr/info/contrecord length) and crash-restarting, +recovery must stop at end-of-WAL with the exact diagnostic for each malformation, +covering both single-page records and records whose header spans a page boundary. +""" + +import struct + +import pypg + +_BIG_ENDIAN = struct.pack("L", 0x12345678) == struct.pack(">L", 0x12345678) + + +def _record_header(xl_tot_len, xl_xid=0, xl_prev=0, xl_info=0, xl_rmid=0, xl_crc=0): + """Build an XLogRecord header (mirrors build_record_header).""" + hi, lo = (xl_prev, 0) if _BIG_ENDIAN else (0, xl_prev) + return struct.pack( + " $2; + END; + $$ LANGUAGE plpgsql immutable; + +CREATE OPERATOR myschema.= ( + leftarg = bigint, + rightarg = int, + procedure = myschema.myintne); + +ALTER DATABASE slotsync_test_db SET SEARCH_PATH TO myschema,pg_catalog; +GRANT USAGE on SCHEMA myschema TO repl_role; +""" + + +def _test_malicious_user(primary, standby1, connstr_1): + """Slot sync is protected from a malicious user who shadows the '=' operator + used in slot sync's validation query.""" + primary.psql_capture("CREATE DATABASE slotsync_test_db") + primary.wait_for_replay_catchup(standby1) + standby1.stop() + primary.safe_psql(_MALICIOUS_SETUP, dbname="slotsync_test_db") + + standby1.append_conf( + "primary_conninfo = '{} dbname=slotsync_test_db " + "user=repl_role'".format(connstr_1) + ) + standby1.start() + # If sync did not handle the attack it would fail validating the + # primary_slot_name. + standby1.safe_psql("SELECT pg_sync_replication_slots();", dbname="slotsync_test_db") + + standby1.append_conf("primary_conninfo = '{} dbname=postgres'".format(connstr_1)) + standby1.reload() + primary.psql_capture("DROP DATABASE slotsync_test_db;") + + +def _test_slot_sync_worker_guc(standby1): + """The slot sync worker starts, exits on an invalid GUC, and restarts on a + valid GUC.""" + log_offset = standby1.current_log_position() + standby1.append_conf("sync_replication_slots = on") + standby1.reload() + standby1.wait_for_log(r"slot sync worker started", log_offset) + + log_offset = standby1.current_log_position() + standby1.append_conf("hot_standby_feedback = off") + standby1.reload() + standby1.wait_for_log( + r"slot synchronization worker will restart because of a parameter change", + log_offset, + ) + standby1.wait_for_log( + r'slot synchronization requires "hot_standby_feedback" to be enabled', + log_offset, + ) + + log_offset = standby1.current_log_position() + standby1.append_conf("hot_standby_feedback = on") + standby1.reload() + standby1.wait_for_log(r"slot sync worker started", log_offset) + + +def _test_worker_syncs_flush_lsn(primary, standby1, subscriber1, connstr): + """The slot sync worker syncs confirmed_flush_lsn of the logical slot.""" + primary.safe_psql( + "CREATE TABLE tab_int (a int PRIMARY KEY);\n" + "INSERT INTO tab_int SELECT generate_series(1, 10);" + ) + subscriber1.safe_psql( + "CREATE TABLE tab_int (a int PRIMARY KEY);\n" + "CREATE SUBSCRIPTION {sub} CONNECTION '{conn}' PUBLICATION {pub} " + "WITH (slot_name = {slot}, failover = true, create_slot = false);".format( + sub=SUB1, conn=connstr, pub=PUB, slot=SLOT1 + ) + ) + subscriber1.wait_for_subscription_sync() + + subscriber1.safe_psql("ALTER SUBSCRIPTION {} DISABLE".format(SUB1)) + assert primary.poll_query_until( + "SELECT COUNT(*) FROM pg_catalog.pg_replication_slots " + "WHERE slot_name = '{}' AND active='f'".format(SLOT1), + expected="1", + ) + primary_flush_lsn = _slot_field(primary, SLOT1, "confirmed_flush_lsn") + _wait_for_synced_flush_lsn( + standby1, + SLOT1, + primary_flush_lsn, + "confirmed_flush_lsn of slot lsub1_slot synced to standby", + ) + + +def _setup_standby2_and_sub2(create_pg, primary, standby1, subscriber1, connstr): + """Create standby2 (sb2_slot), subscriber2 (failover=false), enable sub1, + and configure synchronized_standby_slots='sb1_slot'. Returns + (standby2, subscriber2).""" + backup_name = "backup3" + primary.psql_capture("SELECT pg_create_physical_replication_slot('sb2_slot');") + primary.backup(backup_name) + + standby2 = create_pg( + "standby2", + from_backup=(primary, backup_name), + has_streaming=True, + has_restoring=True, + start=False, + ) + standby2.append_conf("primary_slot_name = 'sb2_slot'") + standby2.start() + primary.wait_for_replay_catchup(standby2) + + primary.append_conf("synchronized_standby_slots = 'sb1_slot'") + primary.reload() + + subscriber2 = create_pg("subscriber2", start=False) + subscriber2.start() + subscriber2.safe_psql( + "CREATE TABLE tab_int (a int PRIMARY KEY);\n" + "CREATE SUBSCRIPTION {sub} CONNECTION '{conn}' PUBLICATION {pub} " + "WITH (slot_name = {slot});".format(sub=SUB2, conn=connstr, pub=PUB, slot=SLOT2) + ) + subscriber2.wait_for_subscription_sync() + subscriber1.safe_psql("ALTER SUBSCRIPTION {} ENABLE".format(SUB1)) + return standby2, subscriber2 + + +def _test_failover_waits_for_standby(primary, standby1, standby2, subs): + """A failover logical slot waits for the physical slot named in + synchronized_standby_slots to catch up before the subscriber gets data.""" + subscriber1, subscriber2 = subs + offset = primary.current_log_position() + standby1.stop() + + row_count = 20 + primary.safe_psql( + "INSERT INTO tab_int SELECT generate_series(11, {});".format(row_count) + ) + primary.wait_for_replay_catchup(standby2) + assert ( + standby2.safe_psql("SELECT count(*) = {} FROM tab_int;".format(row_count)) + == "t" + ), "standby2 gets data from primary" + + primary.wait_for_catchup(SUB2) + assert ( + subscriber2.safe_psql("SELECT count(*) = {} FROM tab_int;".format(row_count)) + == "t" + ), "subscriber2 gets data from primary" + + primary.wait_for_log( + r'replication slot "sb1_slot" specified in parameter ' + r'"synchronized_standby_slots" does not have active_pid', + offset, + ) + assert ( + subscriber1.safe_psql("SELECT count(*) <> {} FROM tab_int;".format(row_count)) + == "t" + ), ( + "subscriber1 doesn't get data from primary until standby1 acknowledges " + "changes" + ) + + standby1.start() + primary.wait_for_replay_catchup(standby1) + assert ( + standby1.safe_psql("SELECT count(*) = {} FROM tab_int;".format(row_count)) + == "t" + ), "standby1 gets data from primary" + + primary.wait_for_catchup(SUB1) + assert ( + subscriber1.safe_psql("SELECT count(*) = {} FROM tab_int;".format(row_count)) + == "t" + ), "subscriber1 gets data from primary after standby1 acknowledges changes" + + +def _test_get_changes_waits(primary, standby1, subscriber1): + """pg_logical_slot_get_changes on a failover slot also waits for the slots + in synchronized_standby_slots; removing the slot lets it return.""" + primary.safe_psql("TRUNCATE tab_int;") + primary.wait_for_catchup(SUB1) + standby1.stop() + + subscriber1.safe_psql("ALTER SUBSCRIPTION {} DISABLE".format(SUB1)) + assert primary.poll_query_until( + "SELECT COUNT(*) FROM pg_catalog.pg_replication_slots " + "WHERE slot_name = '{}' AND active = 'f'".format(SLOT1), + expected="1", + ) + primary.safe_psql( + "SELECT pg_create_logical_replication_slot('test_slot', " + "'test_decoding', false, false, true);" + ) + + back_q = primary.background_psql("postgres", on_error_stop=False) + offset = primary.current_log_position() + back_q.query_until( + r"logical_slot_get_changes", + "\\echo logical_slot_get_changes\n" + "SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);\n", + ) + primary.wait_for_log( + r'replication slot "sb1_slot" specified in parameter ' + r'"synchronized_standby_slots" does not have active_pid', + offset, + ) + primary.adjust_conf("synchronized_standby_slots", "''") + primary.reload() + back_q.quit() + + primary.safe_psql("SELECT pg_drop_replication_slot('test_slot');") + primary.adjust_conf("synchronized_standby_slots", "'sb1_slot'") + primary.reload() + subscriber1.safe_psql("ALTER SUBSCRIPTION {} ENABLE".format(SUB1)) + + +def _test_inactive_physical_slot_waits(primary, subscriber1): + """Logical replication waits for the inactive user-created physical slot in + synchronized_standby_slots until it is removed from the list.""" + offset = primary.current_log_position() + row_count = 10 + primary.safe_psql( + "INSERT INTO tab_int SELECT generate_series(1, {});".format(row_count) + ) + primary.wait_for_log( + r'replication slot "sb1_slot" specified in parameter ' + r'"synchronized_standby_slots" does not have active_pid', + offset, + ) + assert ( + subscriber1.safe_psql("SELECT count(*) = 0 FROM tab_int;") == "t" + ), "subscriber1 doesn't get data as the sb1_slot doesn't catch up" + + primary.adjust_conf("synchronized_standby_slots", "''") + primary.reload() + primary.wait_for_catchup(SUB1) + assert ( + subscriber1.safe_psql("SELECT count(*) = {} FROM tab_int;".format(row_count)) + == "t" + ), ( + "subscriber1 gets data from primary after standby1 is removed from the " + "synchronized_standby_slots list" + ) + primary.adjust_conf("synchronized_standby_slots", "'sb1_slot'") + primary.reload() + + +def _test_two_phase_sync(primary, standby1, subscriber1): + """The two_phase setting syncs to the standby; a transaction prepared before + two_phase was enabled is not yet replicated to the subscriber.""" + standby1.start() + primary.safe_psql( + "BEGIN;\n" + "INSERT INTO tab_int values(0);\n" + "PREPARE TRANSACTION 'test_twophase_slotsync';" + ) + primary.wait_for_replay_catchup(standby1) + primary.wait_for_catchup(SUB1) + + subscriber1.safe_psql("ALTER SUBSCRIPTION {} DISABLE".format(SUB1)) + assert primary.poll_query_until( + "SELECT COUNT(*) FROM pg_catalog.pg_replication_slots " + "WHERE slot_name = '{}' AND active='f'".format(SLOT1), + expected="1", + ) + subscriber1.safe_psql( + "ALTER SUBSCRIPTION {sub} SET (two_phase = true);\n" + "ALTER SUBSCRIPTION {sub} ENABLE;".format(sub=SUB1) + ) + primary.wait_for_catchup(SUB1) + + two_phase_at = _slot_field(primary, SLOT1, "two_phase_at") + assert standby1.poll_query_until( + "SELECT two_phase AND '{at}' = two_phase_at FROM pg_replication_slots " + "WHERE slot_name = '{slot}' AND synced AND NOT temporary;".format( + at=two_phase_at, slot=SLOT1 + ) + ), "two_phase setting of slot lsub1_slot synced to standby" + assert ( + subscriber1.safe_psql("SELECT count(*) = 0 FROM pg_prepared_xacts;") == "t" + ), "the prepared transaction is not replicated to the subscriber" + + +def _test_promotion(primary, standby1, subscriber1, inactive_since_on_primary): + """Promote standby1 to primary: synced slots are retained, logical + replication resumes, the prepared txn commits and replicates, and the synced + snap_test_slot can be consumed.""" + primary.wait_for_replay_catchup(standby1) + promotion_time = standby1.safe_psql("SELECT current_timestamp;") + standby1.promote() + + # Capture inactive_since before the slot is enabled on the new primary. + inactive_since_on_new_primary = standby1.validate_slot_inactive_since( + SLOT1, promotion_time + ) + assert ( + standby1.safe_psql( + "SELECT '{}'::timestamptz > '{}'::timestamptz".format( + inactive_since_on_new_primary, inactive_since_on_primary + ) + ) + == "t" + ), ( + "synchronized slot has got its own inactive_since on the new primary " + "after promotion" + ) + + subscriber1.safe_psql( + "ALTER SUBSCRIPTION {} CONNECTION '{} dbname=postgres';".format( + SUB1, standby1.connstr() + ) + ) + assert ( + standby1.safe_psql( + "SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN " + "('{}', 'snap_test_slot') AND synced AND NOT temporary;".format(SLOT1) + ) + == "t" + ), "synced slot retained on the new primary" + + standby1.safe_psql("COMMIT PREPARED 'test_twophase_slotsync';") + standby1.wait_for_catchup(SUB1) + assert ( + subscriber1.safe_psql("SELECT count(*) FROM tab_int;") == "11" + ), "prepared data replicated from the new primary" + + standby1.safe_psql("INSERT INTO tab_int SELECT generate_series(11, 20);") + standby1.wait_for_catchup(SUB1) + assert ( + subscriber1.safe_psql("SELECT count(*) FROM tab_int;") == "21" + ), "data replicated from the new primary" + + assert ( + standby1.safe_psql( + "SELECT count(*) FROM pg_logical_slot_get_changes('snap_test_slot', " + "NULL, NULL) WHERE data ~ 'message*';" + ) + == "1" + ), "data can be consumed using snap_test_slot" + + +def _cleanup_after_promotion(primary, standby2, subscriber1, subscriber2): + """Drop slots/subscriptions and clear the prepared transaction on the + original primary so the environment is clean for the skip-retry test.""" + primary.psql_capture( + "SELECT pg_drop_replication_slot('sb1_slot');\n" + "SELECT pg_drop_replication_slot('{}');\n" + "SELECT pg_drop_replication_slot('snap_test_slot');".format(SLOT1) + ) + subscriber2.safe_psql("DROP SUBSCRIPTION {};".format(SUB2)) + subscriber1.safe_psql("DROP SUBSCRIPTION {};".format(SUB1)) + subscriber1.safe_psql("TRUNCATE tab_int;") + + primary.adjust_conf("synchronized_standby_slots", "''") + primary.reload() + assert ( + primary.safe_psql( + "SELECT count(*) = 0 FROM pg_replication_slots " + "WHERE slot_name != 'sb2_slot';" + ) + == "t" + ), ( + "all replication slots have been dropped except the physical slot used " + "by standby2" + ) + + primary.safe_psql("COMMIT PREPARED 'test_twophase_slotsync';") + primary.wait_for_replay_catchup(standby2) + + +def _test_skip_and_retry(primary, standby2, subscriber1, connstr, connstr_1): + """pg_sync_replication_slots() on the standby skips and retries until the + slot is sync-ready; the slotsync skip reason and skip count are updated. + + This runs against the original primary (publisher) and its physical standby + standby2 (which still streams from the original primary via sb2_slot). The + promotion of standby1 did not demote the original primary, so it continues + to act as a publisher here.""" + subscriber1.safe_psql( + "CREATE TABLE push_wal (a int);\n" + "CREATE SUBSCRIPTION {sub} CONNECTION '{conn}' PUBLICATION {pub} " + "WITH (slot_name = {slot}, failover = true, enabled = false);".format( + sub=SUB1, conn=connstr, pub=PUB, slot=SLOT1 + ) + ) + primary.safe_psql("CREATE TABLE push_wal (a int);") + primary.wait_for_replay_catchup(standby2) + + log_offset = standby2.current_log_position() + standby2.append_conf( + "hot_standby_feedback = on\n" + "primary_conninfo = '{} dbname=postgres'\n" + "log_min_messages = 'debug2'".format(connstr_1) + ) + standby2.reload() + + handle = standby2.background_psql("postgres", on_error_stop=False) + handle.query_until(r"start", "\\echo start\nSELECT pg_sync_replication_slots();\n") + standby2.wait_for_log( + r'could not synchronize replication slot "lsub1_slot"', log_offset + ) + + assert ( + _slot_field(standby2, SLOT1, "slotsync_skip_reason") == "wal_or_rows_removed" + ), "check slot sync skip reason" + assert ( + standby2.safe_psql( + "SELECT slotsync_skip_count > 0 FROM pg_stat_replication_slots " + "WHERE slot_name = '{}'".format(SLOT1) + ) + == "t" + ), "check slot sync skip count increments" + + primary.append_conf("synchronized_standby_slots = 'sb2_slot'") + primary.reload() + subscriber1.safe_psql("ALTER SUBSCRIPTION {} ENABLE".format(SUB1)) + subscriber1.wait_for_subscription_sync() + primary.safe_psql("SELECT pg_log_standby_snapshot();") + standby2.wait_for_log( + r'newly created replication slot "lsub1_slot" is sync-ready now', + log_offset, + ) + handle.quit() + + +def test_040_standby_failover_slots_sync(create_pg): + """Failover slot synchronization from a publisher to its physical standby.""" + publisher, subscriber1 = _setup_publisher_subscriber(create_pg) + connstr = publisher.connstr("postgres") + + slot_creation_time = _test_failover_flag_toggling(publisher, subscriber1, connstr) + + primary = publisher + connstr_1 = primary.connstr() + standby1 = _create_standby(create_pg, primary, "standby1", "sb1_slot") + + inactive_since_on_primary = _test_sync_two_plugins( + primary, standby1, subscriber1, slot_creation_time + ) + _test_drop_synced_slot(primary, standby1) + _test_invalidate_and_resync(primary, standby1, publisher) + _test_synced_slot_immutable(standby1) + _test_dbname_required(standby1, connstr_1) + _test_cascading_standby(create_pg, standby1) + _test_snapshot_consistency(primary, standby1) + _test_malicious_user(primary, standby1, connstr_1) + _test_slot_sync_worker_guc(standby1) + _test_worker_syncs_flush_lsn(primary, standby1, subscriber1, connstr) + + standby2, subscriber2 = _setup_standby2_and_sub2( + create_pg, primary, standby1, subscriber1, connstr + ) + _test_failover_waits_for_standby( + primary, standby1, standby2, (subscriber1, subscriber2) + ) + _test_get_changes_waits(primary, standby1, subscriber1) + _test_inactive_physical_slot_waits(primary, subscriber1) + _test_two_phase_sync(primary, standby1, subscriber1) + _test_promotion(primary, standby1, subscriber1, inactive_since_on_primary) + _cleanup_after_promotion(primary, standby2, subscriber1, subscriber2) + + # The skip-retry test runs against the original primary (publisher) and + # standby2; standby1 (now a separate promoted primary) is not involved. + _test_skip_and_retry(primary, standby2, subscriber1, connstr, connstr_1) + + standby2.stop() + standby1.stop() + subscriber1.stop() + subscriber2.stop() + publisher.stop() diff --git a/src/test/recovery/pyt/test_041_checkpoint_at_promote.py b/src/test/recovery/pyt/test_041_checkpoint_at_promote.py new file mode 100644 index 0000000000000..ceb711ba8f9ec --- /dev/null +++ b/src/test/recovery/pyt/test_041_checkpoint_at_promote.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/041_checkpoint_at_promote.pl. + +A restartpoint in progress on a standby (paused via an injection point) must +complete after the standby is promoted, and the freshly-promoted primary must +survive a backend SIGKILL through crash recovery (restart_after_crash) and +accept new connections. Requires injection points. +""" + +import os + +import pytest + + +def test_041_checkpoint_at_promote(create_pg): + """An in-progress restartpoint completes post-promote; crash recovery works.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + primary = create_pg("master", allows_streaming=True, start=False) + primary.append_conf("\nlog_checkpoints = on\nrestart_after_crash = on\n") + primary.start() + if not primary.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + backup_name = "my_backup" + primary.backup(backup_name) + standby = create_pg( + "standby1", from_backup=(primary, backup_name), has_streaming=True, start=False + ) + standby.start() + primary.safe_psql("checkpoint") + primary.safe_psql("CREATE TABLE prim_tab (a int);") + primary.safe_psql("CREATE EXTENSION injection_points;") + primary.wait_for_replay_catchup(standby) + standby.safe_psql("SELECT injection_points_attach('create-restart-point', 'wait');") + logstart = standby.current_log_position() + psql_session = standby.background_psql("postgres", on_error_stop=False) + psql_session.query_until( + r"starting_checkpoint", "\n \\echo starting_checkpoint\n CHECKPOINT;\n" + ) + primary.safe_psql("INSERT INTO prim_tab VALUES (1);") + primary.safe_psql("SELECT pg_switch_wal();") + primary.wait_for_replay_catchup(standby) + standby.wait_for_event("checkpointer", "create-restart-point") + assert standby.log_contains( + "restartpoint starting: fast wait", logstart + ), "restartpoint has started" + primary.stop() + standby.promote() + logstart = standby.current_log_position() + standby.safe_psql("SELECT injection_points_wakeup('create-restart-point');") + assert standby.wait_for_log( + r"restartpoint complete", logstart + ), "restart point has completed" + killme = standby.background_psql("postgres") + pid = killme.query("SELECT pg_backend_pid();").strip() + standby.signal_backend(int(pid), "KILL") + killme.wait_for_stderr( + r"server closed the connection unexpectedly|connection to server was lost" + r"|could not send data to server", + "SELECT 1;\n", + ) + killme.quit() + assert standby.poll_query_until("", ""), "server back up after crash recovery" + res = standby.psql_capture("select 1") + assert res.rc == 0, "psql connect success" + assert res.stdout == "1", "psql select 1" diff --git a/src/test/recovery/pyt/test_042_low_level_backup.py b/src/test/recovery/pyt/test_042_low_level_backup.py new file mode 100644 index 0000000000000..0be625a9cf83b --- /dev/null +++ b/src/test/recovery/pyt/test_042_low_level_backup.py @@ -0,0 +1,72 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/042_low_level_backup.pl. + +The low-level base-backup API (pg_backup_start/pg_backup_stop) plus a manual +filesystem copy: without the backup_label the copy recovers as a crash (the +post-backup 'canary' table is absent), but with the backup_label appended it +performs proper backup recovery (the canary is present), as shown by the +distinct log messages each path emits. +""" + +import os +import shutil + + +def test_042_low_level_backup(create_pg): + """A low-level backup recovers as crash without, and properly with, label.""" + primary = create_pg("primary", has_archiving=True, allows_streaming=True) + backup_name = "backup1" + psql = primary.background_psql("postgres") + psql.query_safe("SET client_min_messages TO WARNING") + psql.set_query_timer_restart() + psql.query_safe("select pg_backup_start('test label')") + backup_dir = str(primary.backup_path(backup_name)) + shutil.copytree(primary.datadir, backup_dir, ignore_dangling_symlinks=True) + os.unlink("{}/postmaster.pid".format(backup_dir)) + os.unlink("{}/postmaster.opts".format(backup_dir)) + os.unlink("{}/global/pg_control".format(backup_dir)) + shutil.rmtree("{}/pg_wal".format(backup_dir)) + os.mkdir("{}/pg_wal".format(backup_dir)) + primary.safe_psql("create table canary (id int)") + segment_name = primary.safe_psql("select pg_walfile_name(pg_switch_wal())") + assert primary.poll_query_until( + "SELECT last_archived_wal FROM pg_stat_archiver", segment_name + ), "Timed out while waiting for archiving of switched segment to finish" + primary.safe_psql("checkpoint") + shutil.copy( + "{}/global/pg_control".format(primary.datadir), + "{}/global/pg_control".format(backup_dir), + ) + stop_segment_name = primary.safe_psql( + "SELECT pg_walfile_name(pg_current_wal_lsn())" + ) + backup_label = psql.query_safe("select labelfile from pg_backup_stop()") + psql.quit() + canary_query = "select count(*) from pg_class where relname = 'canary'" + replica = create_pg("replica_fail", from_backup=(primary, backup_name), start=False) + replica.append_conf("archive_mode = off") + shutil.copy( + "{}/{}".format(primary.archive_dir, stop_segment_name), + "{}/pg_wal/{}".format(replica.datadir, stop_segment_name), + ) + replica.start() + assert replica.safe_psql(canary_query) == "0", "canary is missing" + assert replica.log_contains( + "database system was not properly shut down; automatic recovery in progress" + ), "verify backup recovery performed with crash recovery" + replica.teardown_node() + replica.clean_node() + with open("{}/backup_label".format(backup_dir), "a", encoding="utf-8") as fh: + fh.write(backup_label) + replica = create_pg( + "replica_success", + from_backup=(primary, backup_name), + has_restoring=True, + start=False, + ) + replica.start() + assert replica.safe_psql(canary_query) == "1", "canary is present" + assert replica.log_contains( + "starting backup recovery with redo LSN" + ), "verify backup recovery performed with backup_label" diff --git a/src/test/recovery/pyt/test_043_no_contrecord_switch.py b/src/test/recovery/pyt/test_043_no_contrecord_switch.py new file mode 100644 index 0000000000000..67f84c17a2654 --- /dev/null +++ b/src/test/recovery/pyt/test_043_no_contrecord_switch.py @@ -0,0 +1,82 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/043_no_contrecord_switch.pl. + +A WAL page whose header is zeroed (magic 0000) at a page boundary must stop +replay cleanly with an "invalid magic number" message rather than crashing. +Two standbys restoring from the archive both report the bad page; one is then +promoted, generates WAL on the new timeline, and the second standby streams from +it and catches up to the new content. +""" + +import shutil + + +def _wal_segment_name(tli, segment): + return "{:08X}{:08X}{:08X}".format(tli, 0, segment) + + +def _get_int_setting(node, name): + return int( + node.safe_psql("SELECT setting FROM pg_settings WHERE name = '{}'".format(name)) + ) + + +def test_043_no_contrecord_switch(create_pg): + """A zeroed WAL page boundary halts replay cleanly and allows promotion.""" + primary = create_pg( + "primary", allows_streaming=True, has_archiving=True, start=False + ) + primary.append_conf( + "\nautovacuum = off\ncheckpoint_timeout = '30min'\nwal_keep_size = 1GB\n" + ) + primary.start() + primary.backup("backup") + primary.safe_psql("CREATE TABLE t AS SELECT 0") + wal_segment_size = _get_int_setting(primary, "wal_segment_size") + wal_block_size = _get_int_setting(primary, "wal_block_size") + tli = int(primary.safe_psql("SELECT timeline_id FROM pg_control_checkpoint()")) + primary.emit_wal(0) + end_lsn = primary.advance_wal_out_of_record_splitting_zone(wal_block_size) + overflow_size = wal_block_size - (end_lsn % wal_block_size) + end_lsn = primary.emit_wal(overflow_size) + primary.stop("immediate") + start_page = end_lsn & ~(wal_block_size - 1) + wal_file = primary.write_wal( + tli, start_page, wal_segment_size, b"\x00" * wal_block_size + ) + shutil.copy(wal_file, str(primary.archive_dir)) + standby1 = create_pg( + "standby1", + from_backup=(primary, "backup"), + standby=True, + has_restoring=True, + start=False, + ) + standby2 = create_pg( + "standby2", + from_backup=(primary, "backup"), + standby=True, + has_restoring=True, + start=False, + ) + log_size1 = standby1.current_log_position() + log_size2 = standby2.current_log_position() + standby1.start() + standby2.start() + segment = start_page // wal_segment_size + offset = start_page % wal_segment_size + segment_name = _wal_segment_name(tli, segment) + pattern = r"invalid magic number 0000 .* segment {}.* offset {}".format( + segment_name, offset + ) + standby1.wait_for_log(pattern, log_size1) + standby2.wait_for_log(pattern, log_size2) + standby1.promote() + standby1.safe_psql("SELECT pg_switch_wal()") + standby1.safe_psql("INSERT INTO t SELECT * FROM generate_series(1, 1000)") + standby2.enable_streaming(standby1) + standby2.reload() + standby1.wait_for_replay_catchup(standby2) + result = standby2.safe_psql("SELECT count(*) FROM t") + assert result == "1001", "check streamed content on standby2" diff --git a/src/test/recovery/pyt/test_044_invalidate_inactive_slots.py b/src/test/recovery/pyt/test_044_invalidate_inactive_slots.py new file mode 100644 index 0000000000000..3231bbf355481 --- /dev/null +++ b/src/test/recovery/pyt/test_044_invalidate_inactive_slots.py @@ -0,0 +1,59 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/044_invalidate_inactive_slots.pl. + +Idle replication slots are invalidated for idle_timeout: with an injection point +forcing the timeout check, a CHECKPOINT invalidates both a physical and a +logical idle slot (reason 'idle_timeout'), and acquiring the invalidated logical +slot afterward errors. Requires an injection-points build. +""" + +import os +import re + +import pytest + + +def _wait_for_slot_invalidation(node, slot_name, offset): + node.wait_for_log( + r'invalidating obsolete replication slot "{}"'.format(slot_name), offset + ) + assert node.poll_query_until( + "SELECT COUNT(slot_name) = 1 FROM pg_replication_slots\n" + " WHERE slot_name = '{}' AND\n" + " invalidation_reason = 'idle_timeout';".format(slot_name) + ), ( + "Timed out while waiting for invalidation reason of slot {} to be " + "set".format(slot_name) + ) + + +def test_044_invalidate_inactive_slots(create_pg): + """Idle physical and logical slots are invalidated on checkpoint.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("node", allows_streaming="logical", start=False) + node.append_conf( + "\ncheckpoint_timeout = 1h\nidle_replication_slot_timeout = 1min\n" + ) + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql( + "SELECT pg_create_physical_replication_slot(slot_name := 'physical_slot', " + "immediately_reserve := true);\n" + "SELECT pg_create_logical_replication_slot('logical_slot', " + "'test_decoding');" + ) + log_offset = node.current_log_position() + node.safe_psql("CREATE EXTENSION injection_points;") + node.safe_psql("SELECT injection_points_attach('slot-timeout-inval', 'error');") + node.safe_psql("CHECKPOINT") + _wait_for_slot_invalidation(node, "physical_slot", log_offset) + _wait_for_slot_invalidation(node, "logical_slot", log_offset) + res = node.psql_capture( + "SELECT pg_replication_slot_advance('logical_slot', '0/1');" + ) + assert re.search( + r'can no longer access replication slot "logical_slot"', res.stderr + ), "detected error upon trying to acquire invalidated slot on node" diff --git a/src/test/recovery/pyt/test_045_archive_restartpoint.py b/src/test/recovery/pyt/test_045_archive_restartpoint.py new file mode 100644 index 0000000000000..4821ce48ccde2 --- /dev/null +++ b/src/test/recovery/pyt/test_045_archive_restartpoint.py @@ -0,0 +1,45 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/045_archive_restartpoint.pl. + +Test restartpoints during archive recovery. +""" + +_ARCHIVE_MAX_MB = 320 +_WAL_SEGSIZE = 1 + + +def test_archive_restartpoint(create_pg): + """A restore replays past a recovery target across many WAL segments.""" + primary = create_pg( + "primary", + has_archiving=True, + allows_streaming=True, + extra=["--wal-segsize", str(_WAL_SEGSIZE)], + ) + backup_name = "my_backup" + primary.backup(backup_name) + + primary.safe_psql( + "DO $$BEGIN FOR i IN 1..{} LOOP CHECKPOINT; PERFORM pg_switch_wal(); " + "END LOOP; END$$;".format(_ARCHIVE_MAX_MB // _WAL_SEGSIZE) + ) + + # Force archiving of the WAL file containing the recovery target. + until_lsn = primary.lsn("write") + primary.safe_psql("SELECT pg_switch_wal()") + primary.stop() + + restore = create_pg( + "restore", from_backup=(primary, backup_name), has_restoring=True, start=False + ) + restore.append_conf("recovery_target_lsn = '{}'".format(until_lsn)) + restore.append_conf("recovery_target_action = pause") + restore.append_conf("max_wal_size = {}".format(2 * _WAL_SEGSIZE)) + restore.append_conf("log_checkpoints = on") + restore.start() + + assert restore.poll_query_until( + "SELECT '{}'::pg_lsn <= pg_last_wal_replay_lsn()".format(until_lsn) + ), "restore caught up" + restore.stop() diff --git a/src/test/recovery/pyt/test_046_checkpoint_logical_slot.py b/src/test/recovery/pyt/test_046_checkpoint_logical_slot.py new file mode 100644 index 0000000000000..c051236273b08 --- /dev/null +++ b/src/test/recovery/pyt/test_046_checkpoint_logical_slot.py @@ -0,0 +1,137 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/046_checkpoint_logical_slot.pl. + +A logical slot must remain valid when a checkpoint that advances WAL removal +races with a logical-decoding segment advance (injection points coordinate the +race), surviving an immediate crash and restart. Then a synced failover slot on +a standby must not be wrongly invalidated when a restartpoint races with slot +synchronization. Requires injection points. +""" + +import os + +import pytest + + +def test_046_checkpoint_logical_slot(create_pg): + """Logical and synced-failover slots stay valid across checkpoint races.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("mike", allows_streaming="logical", start=False) + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql("CREATE EXTENSION injection_points") + node.safe_psql( + "select pg_create_logical_replication_slot('slot_logical', 'test_decoding')" + ) + node.safe_psql("select pg_create_physical_replication_slot('slot_physical', true)") + node.safe_psql( + "select count(*) from pg_logical_slot_get_changes('slot_logical', null, null)" + ) + node.safe_psql( + "select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())" + ) + node.safe_psql("checkpoint") + xacts = node.background_psql("postgres") + xacts.query_until(r"run_xacts", "\\echo run_xacts\nSELECT 1 \\watch 0.1\n\\q\n") + node.advance_wal(20) + node.safe_psql("checkpoint") + node.advance_wal(20) + checkpoint = node.background_psql("postgres") + checkpoint.query_safe( + "select injection_points_attach('checkpoint-before-old-wal-removal','wait')" + ) + checkpoint.query_until( + r"starting_checkpoint", "\\echo starting_checkpoint\ncheckpoint;\n\\q\n" + ) + node.wait_for_event("checkpointer", "checkpoint-before-old-wal-removal") + logical = node.background_psql("postgres") + logical.query_safe( + "select injection_points_attach(" + "'logical-replication-slot-advance-segment','wait');" + ) + logical.query_until( + r"get_changes", + "\n\\echo get_changes\n" + "select count(*) from pg_logical_slot_get_changes('slot_logical', null, " + "null) \\watch 1\n\\q\n", + ) + node.wait_for_event("client backend", "logical-replication-slot-advance-segment") + node.safe_psql( + "select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())" + ) + node.safe_psql( + "select pg_logical_emit_message(false, '', repeat('123456789', 1000))" + ) + log_offset = node.current_log_position() + node.safe_psql( + "select injection_points_wakeup('checkpoint-before-old-wal-removal')" + ) + node.wait_for_log(r"checkpoint complete", log_offset) + node.stop("immediate") + node.start() + node.safe_psql( + "select count(*) from pg_logical_slot_get_changes('slot_logical', null, null);" + ) + xacts.quit() + checkpoint.quit() + logical.quit() + _failover_slot_phase(create_pg, node) + + +def _failover_slot_phase(create_pg, primary): + """A synced failover slot on a standby is not invalidated by a restartpoint.""" + primary.append_conf("autovacuum = off") + primary.reload() + backup_name = "backup" + primary.backup(backup_name) + standby = create_pg( + "standby", + from_backup=(primary, backup_name), + has_streaming=True, + start=False, + ) + connstr_1 = primary.connstr() + standby.append_conf( + "\nhot_standby_feedback = on\n" + "primary_slot_name = 'phys_slot'\n" + "primary_conninfo = '{} dbname=postgres'\n".format(connstr_1) + ) + primary.safe_psql( + "SELECT pg_create_logical_replication_slot(" + "'failover_slot', 'test_decoding', false, false, true);\n" + "SELECT pg_create_physical_replication_slot('phys_slot');" + ) + standby.start() + primary.advance_wal(1) + primary.safe_psql("CHECKPOINT") + primary.wait_for_replay_catchup(standby) + checkpoint = standby.background_psql("postgres") + checkpoint.query_safe( + "select injection_points_attach(" + "'restartpoint-before-slot-invalidation','wait')" + ) + checkpoint.query_until( + r"starting_checkpoint", "\\echo starting_checkpoint\ncheckpoint;\n" + ) + standby.wait_for_event("checkpointer", "restartpoint-before-slot-invalidation") + standby.append_conf("sync_replication_slots = on") + standby.reload() + standby.poll_query_until( + "SELECT COUNT(*) > 0 FROM pg_replication_slots " + "WHERE slot_name = 'failover_slot'" + ) + standby.safe_psql( + "select injection_points_wakeup('restartpoint-before-slot-invalidation');\n" + "select injection_points_detach('restartpoint-before-slot-invalidation')" + ) + checkpoint.quit() + assert ( + standby.safe_psql( + "SELECT invalidation_reason IS NULL AND synced FROM pg_replication_slots " + "WHERE slot_name = 'failover_slot';" + ) + == "t" + ), "logical slot is not invalidated" diff --git a/src/test/recovery/pyt/test_047_checkpoint_physical_slot.py b/src/test/recovery/pyt/test_047_checkpoint_physical_slot.py new file mode 100644 index 0000000000000..bd74e8824c4ba --- /dev/null +++ b/src/test/recovery/pyt/test_047_checkpoint_physical_slot.py @@ -0,0 +1,66 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/047_checkpoint_physical_slot.pl. + +A physical replication slot's restart_lsn advanced during an in-progress +checkpoint (paused before old-WAL removal via an injection point) must still +reference a WAL segment that exists after an immediate crash and restart. +Requires injection points. +""" + +import os + +import pytest + + +def test_047_checkpoint_physical_slot(create_pg): + """The slot's required WAL segment survives a checkpoint-time advance + crash.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("mike", start=False) + node.append_conf("wal_level = 'replica'") + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql("CREATE EXTENSION injection_points") + node.safe_psql("select pg_create_physical_replication_slot('slot_physical', true)") + node.safe_psql( + "select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())" + ) + node.safe_psql("checkpoint") + node.advance_wal(20) + node.safe_psql( + "select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())" + ) + node.safe_psql("checkpoint") + node.advance_wal(20) + checkpoint = node.background_psql("postgres") + checkpoint.query_safe( + "select injection_points_attach('checkpoint-before-old-wal-removal','wait')" + ) + checkpoint.query_until( + r"starting_checkpoint", "\\echo starting_checkpoint\ncheckpoint;\n\\q\n" + ) + node.wait_for_event("checkpointer", "checkpoint-before-old-wal-removal") + node.safe_psql( + "select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())" + ) + log_offset = node.current_log_position() + node.safe_psql( + "select injection_points_wakeup('checkpoint-before-old-wal-removal')" + ) + node.wait_for_log(r"checkpoint complete", log_offset) + node.stop("immediate") + node.start() + restart_lsn = node.safe_psql( + "select restart_lsn from pg_replication_slots " + "where slot_name = 'slot_physical'" + ) + restart_lsn_segment = node.safe_psql( + "SELECT pg_walfile_name('{}'::pg_lsn)".format(restart_lsn) + ) + assert os.path.isfile( + "{}/pg_wal/{}".format(node.datadir, restart_lsn_segment) + ), "WAL segment {} for physical slot's restart_lsn {} exists".format( + restart_lsn_segment, restart_lsn + ) diff --git a/src/test/recovery/pyt/test_048_vacuum_horizon_floor.py b/src/test/recovery/pyt/test_048_vacuum_horizon_floor.py new file mode 100644 index 0000000000000..31c09f210f894 --- /dev/null +++ b/src/test/recovery/pyt/test_048_vacuum_horizon_floor.py @@ -0,0 +1,130 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/048_vacuum_horizon_floor.pl. + +Reproduces the vacuum-horizon-floor scenario: a VACUUM on the primary blocks in +BufferCleanup while a cursor pins a heap page; meanwhile the standby's +hot_standby_feedback horizon is toggled (by detaching/reattaching the walreceiver +via primary_conninfo) so the computed vacuum horizon could regress. The pinned +tuple (value 7) must still be readable across the cursor's second FETCH, and the +VACUUM must complete without corrupting visibility. +""" + +import re + + +def test_048_vacuum_horizon_floor(create_pg): + """VACUUM under a moving standby horizon preserves pinned-tuple visibility.""" + primary = create_pg("primary", allows_streaming="physical", start=False) + primary.append_conf( + "\nhot_standby_feedback = on\nautovacuum = off\nlog_min_messages = INFO\n" + "maintenance_work_mem = 64\nio_combine_limit = 1\n" + ) + primary.start() + primary.backup("my_backup") + replica = create_pg( + "standby", + from_backup=(primary, "my_backup"), + has_streaming=True, + start=False, + ) + replica.start() + test_db = "test_db" + primary.safe_psql("CREATE DATABASE {}".format(test_db)) + orig_conninfo = primary.connstr() + table1 = "vac_horizon_floor_table" + psql_a = primary.background_psql(test_db, on_error_stop=True) + psql_b = primary.background_psql(test_db, on_error_stop=True) + nrows = 2000 + primary.safe_psql( + "CREATE TABLE {t}(col1 int)\n" + " WITH (autovacuum_enabled=false, fillfactor=10);\n" + "INSERT INTO {t} VALUES(7);\n" + "INSERT INTO {t} SELECT generate_series(1, {n}) % 3;\n" + "CREATE INDEX on {t}(col1);\n" + "DELETE FROM {t} WHERE col1 = 0;\n" + "INSERT INTO {t} VALUES(7);".format(t=table1, n=nrows), + dbname=test_db, + ) + primary_lsn = primary.lsn("flush") + primary.wait_for_catchup(replica, "replay", primary_lsn) + assert replica.poll_query_until( + "SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);", "t", dbname=test_db + ) + replica.safe_psql( + "ALTER SYSTEM SET primary_conninfo = '';\nSELECT pg_reload_conf();", + dbname=test_db, + ) + assert replica.poll_query_until( + "SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);", "f", dbname=test_db + ) + res = psql_a.query_safe( + "INSERT INTO {t} VALUES (99);\n" + "UPDATE {t} SET col1 = 100 WHERE col1 = 99;\n" + "SELECT 'after_update';".format(t=table1) + ) + assert re.search( + r"^after_update$", res, re.M + ), "UPDATE occurred on primary session A" + cursor1 = "vac_horizon_floor_cursor1" + res = psql_b.query_safe( + "BEGIN;\nSET enable_bitmapscan = off;\nSET enable_indexscan = off;\n" + "SET enable_indexonlyscan = off;\n" + "DECLARE {c} CURSOR FOR SELECT * FROM {t} WHERE col1 = 7;\n" + "FETCH {c};".format(c=cursor1, t=table1) + ) + assert res == "7", "Cursor query returned {}. Expected value 7.".format(res) + vacuum_pid = psql_a.query_safe("SELECT pg_backend_pid();") + psql_a.send( + "SET maintenance_io_concurrency = 0;\n" + "VACUUM (VERBOSE, FREEZE, PARALLEL 0) {t};\n" + "\\echo VACUUM\n".format(t=table1) + ) + assert primary.poll_query_until( + "SELECT count(*) >= 1 FROM pg_stat_activity\n" + " WHERE pid = {pid}\n" + " AND wait_event = 'BufferCleanup';".format(pid=vacuum_pid), + "t", + dbname=test_db, + ) + assert replica.poll_query_until( + "SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);", "f", dbname=test_db + ) + replica.safe_psql( + "ALTER SYSTEM SET primary_conninfo = '{}';\n" + "SELECT pg_reload_conf();".format(orig_conninfo), + dbname=test_db, + ) + assert replica.poll_query_until( + "SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);", "t", dbname=test_db + ) + assert primary.poll_query_until( + "SELECT EXISTS (SELECT * FROM pg_stat_replication);", "t", dbname=test_db + ) + res = psql_b.query_safe("FETCH {}".format(cursor1)) + assert ( + res == "7" + ), "Cursor query returned {} from second fetch. Expected value 7.".format(res) + assert primary.poll_query_until( + "SELECT index_vacuum_count > 0\n" + "FROM pg_stat_progress_vacuum\n" + "WHERE datname='{db}' AND relid::regclass = '{t}'::regclass;".format( + db=test_db, t=table1 + ), + "t", + dbname=test_db, + ) + psql_b.query_until(re.compile(r"^commit$", re.M), "COMMIT;\n\\echo commit\n") + assert primary.poll_query_until( + "SELECT vacuum_count > 0\n" + "FROM pg_stat_all_tables WHERE relname = '{t}';".format(t=table1), + "t", + dbname=test_db, + ) + primary_lsn = primary.lsn("flush") + primary.safe_psql("INSERT INTO {t} VALUES (1);".format(t=table1), dbname=test_db) + primary.wait_for_catchup(replica, "replay", primary_lsn) + psql_a.quit() + psql_b.quit() + replica.stop() + primary.stop() diff --git a/src/test/recovery/pyt/test_049_wait_for_lsn.py b/src/test/recovery/pyt/test_049_wait_for_lsn.py new file mode 100644 index 0000000000000..16a2794bf7658 --- /dev/null +++ b/src/test/recovery/pyt/test_049_wait_for_lsn.py @@ -0,0 +1,947 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/049_wait_for_lsn.pl. + +Checks waiting for an LSN using the WAIT FOR command. Tests the standby wait +modes (standby_replay/standby_write/standby_flush) on a standby and primary_flush +mode on a primary: sessions block until replay/write/flush reaches the target +LSN and unblock exactly then, error on invalid targets or when called in a +transaction with a held snapshot / inside a function/procedure/DO block, time +out as specified, surface the right wait events in pg_stat_activity, behave +correctly across recovery pause/resume and promotion, and survive a cascade +upstream's timeline switch. +""" + + +def _stop_walreceiver(node): + """Stop node's walreceiver by clearing primary_conninfo. + + Returns the saved (quoted) primary_conninfo so resume_walreceiver() can + restore it, mirroring the Perl stop_walreceiver helper. Freezes the + walreceiver-tracked positions (writtenUpto, flushedUpto) for fencepost + tests. + """ + saved_primary_conninfo = node.safe_psql( + "SELECT pg_catalog.quote_literal(setting)\n" + "FROM pg_settings\n" + "WHERE name = 'primary_conninfo';" + ) + node.safe_psql("ALTER SYSTEM SET primary_conninfo = '';\nSELECT pg_reload_conf();") + assert node.poll_query_until( + "SELECT NOT EXISTS (SELECT * FROM pg_stat_wal_receiver);" + ) + return saved_primary_conninfo + + +def _resume_walreceiver(node, saved_primary_conninfo): + """Restart node's walreceiver by restoring the saved primary_conninfo. + + Must be paired with a prior _stop_walreceiver() call (mirrors the Perl + resume_walreceiver helper). + """ + node.safe_psql( + "ALTER SYSTEM SET primary_conninfo = {};\n" + "SELECT pg_reload_conf();".format(saved_primary_conninfo) + ) + assert node.poll_query_until("SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);") + + +def _check_wait_for_lsn_fencepost(node, mode, current_lsn, label): + """Verify the wait predicate target <= currentLSN at the boundary. + + Given current_lsn (the frozen position for mode), checks that target == + current and target == current - 1 succeed and target == current + 1 times + out. Returns (lsn_minus, lsn_plus). Mirrors the Perl helper. + """ + lsn_minus = node.safe_psql("SELECT ('{}'::pg_lsn - 1)::text".format(current_lsn)) + lsn_plus = node.safe_psql("SELECT ('{}'::pg_lsn + 1)::text".format(current_lsn)) + + cases = [ + (current_lsn, "success", "target == current succeeds", "5s"), + (lsn_minus, "success", "target == current - 1 succeeds", "5s"), + (lsn_plus, "timeout", "target == current + 1 times out", "500ms"), + ] + for target_lsn, expected, desc, timeout in cases: + output = node.safe_psql( + "WAIT FOR LSN '{target}'\n" + "\tWITH (MODE '{mode}', timeout '{timeout}', no_throw);".format( + target=target_lsn, mode=mode, timeout=timeout + ) + ) + assert output == expected, "{}: {}".format(label, desc) + + return lsn_minus, lsn_plus + + +def _launch_wait(node, script): + """Start a background psql, run script up to its '\\echo start', return it. + + The script must echo 'start' before its blocking WAIT FOR, so query_until + returns while the WAIT FOR keeps running in the session (mirrors the Perl + background_psql + query_until(qr/start/) pattern). + """ + session = node.background_psql("postgres") + session.query_until(r"start", script) + return session + + +def _expect_blocked(node, count, wait_event_pattern): + """Poll until exactly count backends are blocked on the wait event(s).""" + if wait_event_pattern.endswith("%"): + predicate = "wait_event LIKE '{}'".format(wait_event_pattern) + else: + predicate = "wait_event = '{}'".format(wait_event_pattern) + assert node.poll_query_until( + "SELECT count(*) = {} FROM pg_stat_activity WHERE {}".format(count, predicate) + ) + + +def test_049_wait_for_lsn(create_pg): + """WAIT FOR LSN across all modes, validation, pause/resume, and promotion.""" + primary = create_pg("primary", allows_streaming=True) + primary.safe_psql("CREATE TABLE wait_test AS SELECT generate_series(1,10) AS a") + backup_name = "my_backup" + primary.backup(backup_name) + + standby = create_pg( + "standby", from_backup=(primary, backup_name), has_streaming=True, start=False + ) + standby.append_conf("recovery_min_apply_delay = '1s'") + standby.start() + + lsn2, lsn3 = _basic_modes(primary, standby) + _timeout_and_subxact(primary, standby, lsn2, lsn3) + _mode_and_syntax_validation(primary, standby, lsn3) + _multi_replay_waiters(primary, standby) + _multi_write_waiters(primary, standby) + _multi_flush_waiters(primary, standby) + _mixed_mode_waiters(primary, standby) + _multi_primary_flush_waiters(primary) + _promotion_terminates_waits(primary, standby) + + _archive_only_standby(create_pg) + _fresh_shmem_walreceiver(create_pg) + _cascade_timeline_switch(create_pg) + + +def _basic_modes(primary, standby): + """Sections 1-3: basic WAIT FOR in each mode reaches the target LSN.""" + # 1. WAIT FOR works for replay. + primary.safe_psql("INSERT INTO wait_test VALUES (generate_series(11, 20))") + lsn1 = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + output = standby.safe_psql( + "WAIT FOR LSN '{lsn}' WITH (timeout '1d');\n" + "SELECT pg_lsn_cmp(pg_last_wal_replay_lsn(), '{lsn}'::pg_lsn);".format(lsn=lsn1) + ) + assert ( + int(output.split("\n")[-1]) >= 0 + ), "standby reached the same LSN as primary after WAIT FOR" + + # 2. New data is visible after WAIT FOR. + primary.safe_psql("INSERT INTO wait_test VALUES (generate_series(21, 30))") + lsn2 = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + output = standby.safe_psql( + "WAIT FOR LSN '{}';\nSELECT count(*) FROM wait_test;".format(lsn2) + ) + assert output.split("\n")[-1] == "30", "standby reached the same LSN as primary" + + # 3. WAIT FOR with standby_write, standby_flush, and primary_flush modes. + primary.safe_psql("INSERT INTO wait_test VALUES (generate_series(31, 40))") + lsn_write = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + output = standby.safe_psql( + "WAIT FOR LSN '{lsn}' WITH (MODE 'standby_write', timeout '1d');\n" + "SELECT pg_lsn_cmp((SELECT written_lsn FROM pg_stat_wal_receiver), " + "'{lsn}'::pg_lsn);".format(lsn=lsn_write) + ) + assert int(output.split("\n")[-1]) >= 0, ( + "standby wrote WAL up to target LSN after WAIT FOR with MODE " "'standby_write'" + ) + + primary.safe_psql("INSERT INTO wait_test VALUES (generate_series(41, 50))") + lsn_flush = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + output = standby.safe_psql( + "WAIT FOR LSN '{lsn}' WITH (MODE 'standby_flush', timeout '1d');\n" + "SELECT pg_lsn_cmp(pg_last_wal_receive_lsn(), '{lsn}'::pg_lsn);".format( + lsn=lsn_flush + ) + ) + assert int(output.split("\n")[-1]) >= 0, ( + "standby flushed WAL up to target LSN after WAIT FOR with MODE " + "'standby_flush'" + ) + + primary.safe_psql("INSERT INTO wait_test VALUES (generate_series(51, 60))") + lsn_primary_flush = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + output = primary.safe_psql( + "WAIT FOR LSN '{lsn}' WITH (MODE 'primary_flush', timeout '1d');\n" + "SELECT pg_lsn_cmp(pg_current_wal_flush_lsn(), '{lsn}'::pg_lsn);".format( + lsn=lsn_primary_flush + ) + ) + assert ( + int(output.split("\n")[-1]) >= 0 + ), "primary flushed WAL up to target LSN after WAIT FOR with MODE 'primary_flush'" + + lsn3 = primary.safe_psql("SELECT pg_current_wal_insert_lsn() + 10000000000") + return lsn2, lsn3 + + +def _timeout_and_subxact(primary, standby, lsn2, lsn3): + """Section 4 + 4a: timeout statuses and subtransaction cleanup.""" + standby.safe_psql("WAIT FOR LSN '{}' WITH (timeout '10ms');".format(lsn2)) + res = standby.psql_capture( + "WAIT FOR LSN '{}' WITH (timeout '1000ms');".format(lsn3) + ) + assert ( + "timed out while waiting for target LSN" in res.stderr + ), "get timeout on waiting for unreachable LSN" + + output = standby.safe_psql( + "WAIT FOR LSN '{}' WITH (timeout '0.1s', no_throw);".format(lsn2) + ) + assert ( + output == "success" + ), "WAIT FOR returns correct status after successful waiting" + output = standby.safe_psql( + "WAIT FOR LSN '{}' WITH (timeout '10ms', no_throw);".format(lsn3) + ) + assert output == "timeout", "WAIT FOR returns correct status after timeout" + + _subxact_cleanup(primary) + + +def _subxact_cleanup(primary): + """Section 4a: aborting a subtransaction during WAIT FOR cleans up state.""" + subxact_lsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn() + 10000000000") + appname = "wait_for_lsn_subxact_cleanup" + session = primary.background_psql("postgres", on_error_stop=False) + session.query_until( + r"start", + "SET application_name = '{appname}';\n" + "BEGIN;\n" + "SAVEPOINT wait_cleanup;\n" + "\\echo start\n" + "WAIT FOR LSN '{lsn}' WITH (MODE 'primary_flush');\n" + "ROLLBACK TO wait_cleanup;\n" + "WAIT FOR LSN '{lsn}'\n" + "\tWITH (MODE 'primary_flush', timeout '10ms', no_throw);\n" + "COMMIT;\n".format(appname=appname, lsn=subxact_lsn), + ) + assert primary.poll_query_until( + "SELECT count(*) = 1 FROM pg_stat_activity\n" + "WHERE application_name = '{}'\n" + " AND wait_event = 'WaitForWalFlush'".format(appname) + ), "WAIT FOR LSN did not enter the primary_flush wait path" + subxact_cancelled = primary.safe_psql( + "SELECT pg_cancel_backend(pid) FROM pg_stat_activity\n" + "WHERE application_name = '{}'\n" + " AND wait_event = 'WaitForWalFlush'".format(appname) + ) + assert subxact_cancelled == "t", "canceled WAIT FOR LSN in subtransaction" + session.quit() + stdout = session.stdout.rstrip("\n") + assert ( + "canceling statement due to user request" in session.stderr + ), "query cancel interrupted WAIT FOR LSN in subtransaction" + assert stdout == "timeout", "second WAIT FOR LSN timed out after savepoint rollback" + assert ( + "server closed the connection unexpectedly" not in session.stderr + ), "WAIT FOR LSN after savepoint rollback did not disconnect" + + +def _mode_and_syntax_validation(primary, standby, lsn3): + """Section 5 + 6: mode validation and parameter/syntax error cases.""" + res = primary.psql_capture( + "WAIT FOR LSN '{}' WITH (MODE 'standby_flush');".format(lsn3) + ) + assert ( + "recovery is not in progress" in res.stderr + ), "get an error when running standby_flush on the primary" + + res = standby.psql_capture( + "WAIT FOR LSN '{}' WITH (MODE 'primary_flush');".format(lsn3) + ) + assert ( + "recovery is in progress" in res.stderr + ), "get an error when running primary_flush on the standby" + + res = standby.psql_capture( + "BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT 1; " + "WAIT FOR LSN '{}';".format(lsn3) + ) + assert ( + "WAIT FOR must be called without an active or registered snapshot" in res.stderr + ), ( + "get an error when running in a transaction with an isolation level " + "higher than REPEATABLE READ" + ) + + _wrap_validation(primary, standby, lsn3) + _param_validation(primary, standby) + + +def _wrap_validation(primary, standby, lsn3): + """Section 5 (cont.): WAIT FOR errors inside function/procedure/DO block.""" + primary.safe_psql( + "CREATE FUNCTION pg_wal_replay_wait_wrap(target_lsn pg_lsn) " + "RETURNS void AS $$\n" + " BEGIN\n" + " EXECUTE format('WAIT FOR LSN %L;', target_lsn);\n" + " END\n" + "$$\n" + "LANGUAGE plpgsql;\n" + "\n" + "CREATE PROCEDURE pg_wal_replay_wait_proc(target_lsn pg_lsn) AS $$\n" + " BEGIN\n" + " EXECUTE format('WAIT FOR LSN %L;', target_lsn);\n" + " END\n" + "$$\n" + "LANGUAGE plpgsql;\n" + ) + primary.wait_for_catchup(standby) + + res = standby.psql_capture("SELECT pg_wal_replay_wait_wrap('{}');".format(lsn3)) + assert ( + "WAIT FOR can only be executed as a top-level statement" in res.stderr + ), "get an error when running within a function" + + res = standby.psql_capture("CALL pg_wal_replay_wait_proc('{}');".format(lsn3)) + assert ( + "WAIT FOR can only be executed as a top-level statement" in res.stderr + ), "get an error when running within a procedure" + + res = standby.psql_capture( + "DO $$ BEGIN EXECUTE format('WAIT FOR LSN %L;', '{}'); END $$;".format(lsn3) + ) + assert ( + "WAIT FOR can only be executed as a top-level statement" in res.stderr + ), "get an error when running within a DO block" + + +def _param_validation(primary, standby): + """Section 6: parameter and syntax validation error cases on standby.""" + test_lsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + lsn2 = test_lsn + lsn3 = primary.safe_psql("SELECT pg_current_wal_insert_lsn() + 10000000000") + + checks = [ + ( + "WAIT FOR LSN '{}' WITH (timeout '-1000ms');".format(test_lsn), + "timeout cannot be negative", + "get error for negative timeout", + ), + ( + "WAIT FOR LSN '{}' WITH (unknown_param 'value');".format(test_lsn), + 'option "unknown_param" not recognized', + "get error for unknown parameter", + ), + ( + "WAIT FOR LSN '{}' WITH (timeout '1000', timeout '2000');".format(test_lsn), + "conflicting or redundant options", + "get error for duplicate TIMEOUT parameter", + ), + ( + "WAIT FOR LSN '{}' WITH (no_throw, no_throw);".format(test_lsn), + "conflicting or redundant options", + "get error for duplicate NO_THROW parameter", + ), + ( + "WAIT FOR LSN '{}' (timeout '100ms');".format(test_lsn), + "syntax error", + "get syntax error when options specified without WITH keyword", + ), + ( + "WAIT FOR TIMEOUT 1000;", + "syntax error", + "get syntax error for missing LSN", + ), + ( + "WAIT FOR LSN 'invalid_lsn';", + "invalid input syntax for type pg_lsn", + "get error for invalid LSN format", + ), + ( + "WAIT FOR LSN '{}' WITH (timeout 'invalid');".format(test_lsn), + "invalid timeout value", + "get error for invalid timeout format", + ), + ( + "WAIT FOR LSN '{}' WITH (invalid_option 'value');".format(test_lsn), + 'option "invalid_option" not recognized', + "get error for invalid WITH clause option", + ), + ( + "WAIT FOR LSN '{}' WITH (MODE 'invalid');".format(test_lsn), + 'unrecognized value for WAIT option "mode": "invalid"', + "get error for invalid MODE value", + ), + ( + "WAIT FOR LSN '{}' WITH (MODE 'standby_replay', " + "MODE 'standby_write');".format(test_lsn), + "conflicting or redundant options", + "get error for duplicate MODE parameter", + ), + ] + for sql, pattern, msg in checks: + res = standby.psql_capture(sql) + assert pattern in res.stderr, "{}: stderr was {!r}".format(msg, res.stderr) + + output = standby.safe_psql( + "WAIT FOR LSN '{}' WITH (timeout '0.1s', no_throw);".format(lsn2) + ) + assert output == "success", "WAIT FOR WITH clause syntax works correctly" + output = standby.safe_psql( + "WAIT FOR LSN '{}' WITH (timeout 100, no_throw);".format(lsn3) + ) + assert output == "timeout", "WAIT FOR WITH clause returns correct timeout status" + + +def _create_logging_functions(primary): + """Create the log_count / log_wait_done helper functions on the primary.""" + primary.safe_psql( + "CREATE FUNCTION log_count(i int) RETURNS void AS $$\n" + " DECLARE\n" + " count int;\n" + " BEGIN\n" + " SELECT count(*) FROM wait_test INTO count;\n" + " IF count >= 31 + i THEN\n" + " RAISE LOG 'count %', i;\n" + " END IF;\n" + " END\n" + "$$\n" + "LANGUAGE plpgsql;\n" + "\n" + "CREATE FUNCTION log_wait_done(prefix text, i int) RETURNS void AS $$\n" + " BEGIN\n" + " RAISE LOG '% %', prefix, i;\n" + " END\n" + "$$\n" + "LANGUAGE plpgsql;\n" + ) + + +def _multi_replay_waiters(primary, standby): + """Section 7a: multiple standby_replay waiters report consistent data.""" + _create_logging_functions(primary) + standby.safe_psql("SELECT pg_wal_replay_pause();") + + sessions = [] + for i in range(5): + primary.safe_psql("INSERT INTO wait_test VALUES ({});".format(i)) + lsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + sessions.append( + _launch_wait( + standby, + "\\echo start\n" + "WAIT FOR LSN '{lsn}';\n" + "SELECT log_count({i});\n".format(lsn=lsn, i=i), + ) + ) + + log_offset = standby.current_log_position() + standby.safe_psql("SELECT pg_wal_replay_resume();") + for i in range(5): + standby.wait_for_log("count {}".format(i), log_offset) + sessions[i].quit() + + +def _multi_write_waiters(primary, standby): + """Section 7b: multiple standby_write waiters unblock when WAL is written.""" + saved = _stop_walreceiver(standby) + + write_lsns = [] + for i in range(5): + primary.safe_psql("INSERT INTO wait_test VALUES (100 + {});".format(i)) + write_lsns.append(primary.safe_psql("SELECT pg_current_wal_insert_lsn()")) + + sessions = [] + for i in range(5): + sessions.append( + _launch_wait( + standby, + "\\echo start\n" + "WAIT FOR LSN '{lsn}' WITH (MODE 'standby_write', timeout '1d');\n" + "SELECT log_wait_done('write_done', {i});\n".format( + lsn=write_lsns[i], i=i + ), + ) + ) + + _expect_blocked(standby, 5, "WaitForWalWrite") + + write_log_offset = standby.current_log_position() + _resume_walreceiver(standby, saved) + + for i in range(5): + standby.wait_for_log("write_done {}".format(i), write_log_offset) + sessions[i].quit() + + output = standby.safe_psql( + "SELECT pg_lsn_cmp((SELECT written_lsn FROM pg_stat_wal_receiver), " + "'{}'::pg_lsn);".format(write_lsns[4]) + ) + assert ( + int(output) >= 0 + ), "multiple standby_write waiters: standby wrote WAL up to target LSN" + + +def _multi_flush_waiters(primary, standby): + """Section 7c: multiple standby_flush waiters unblock when WAL is flushed.""" + saved = _stop_walreceiver(standby) + + flush_lsns = [] + for i in range(5): + primary.safe_psql("INSERT INTO wait_test VALUES (200 + {});".format(i)) + flush_lsns.append(primary.safe_psql("SELECT pg_current_wal_insert_lsn()")) + + sessions = [] + for i in range(5): + sessions.append( + _launch_wait( + standby, + "\\echo start\n" + "WAIT FOR LSN '{lsn}' WITH (MODE 'standby_flush', timeout '1d');\n" + "SELECT log_wait_done('flush_done', {i});\n".format( + lsn=flush_lsns[i], i=i + ), + ) + ) + + _expect_blocked(standby, 5, "WaitForWalFlush") + + flush_log_offset = standby.current_log_position() + _resume_walreceiver(standby, saved) + + for i in range(5): + standby.wait_for_log("flush_done {}".format(i), flush_log_offset) + sessions[i].quit() + + output = standby.safe_psql( + "SELECT pg_lsn_cmp(pg_last_wal_receive_lsn(), '{}'::pg_lsn);".format( + flush_lsns[4] + ) + ) + assert ( + int(output) >= 0 + ), "multiple standby_flush waiters: standby flushed WAL up to target LSN" + + +def _mixed_mode_waiters(primary, standby): + """Section 7d: mixed standby-mode waiters unblock on resume + reconnect.""" + saved = _stop_walreceiver(standby) + standby.safe_psql("SELECT pg_wal_replay_pause();") + + primary.safe_psql("INSERT INTO wait_test VALUES (generate_series(301, 310));") + target_lsn = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + + modes = ["standby_replay", "standby_write", "standby_flush"] + sessions = [] + for i in range(6): + sessions.append( + _launch_wait( + standby, + "\\echo start\n" + "WAIT FOR LSN '{lsn}' WITH (MODE '{mode}', timeout '1d');\n" + "SELECT log_wait_done('mixed_done', {i});\n".format( + lsn=target_lsn, mode=modes[i % 3], i=i + ), + ) + ) + + _expect_blocked(standby, 6, "WaitForWal%") + + mixed_log_offset = standby.current_log_position() + standby.safe_psql("SELECT pg_wal_replay_resume();") + assert standby.poll_query_until("SELECT NOT pg_is_wal_replay_paused();") + + _resume_walreceiver(standby, saved) + + for i in range(6): + standby.wait_for_log("mixed_done {}".format(i), mixed_log_offset) + sessions[i].quit() + + output = standby.safe_psql( + "SELECT pg_lsn_cmp((SELECT written_lsn FROM pg_stat_wal_receiver), " + "'{lsn}'::pg_lsn) >= 0 AND\n" + " pg_lsn_cmp(pg_last_wal_receive_lsn(), '{lsn}'::pg_lsn) >= 0 AND\n" + " pg_lsn_cmp(pg_last_wal_replay_lsn(), '{lsn}'::pg_lsn) >= 0;".format( + lsn=target_lsn + ) + ) + assert ( + output == "t" + ), "mixed mode waiters: all modes completed and reached target LSN" + + +def _multi_primary_flush_waiters(primary): + """Section 7e: multiple primary_flush waiters on the primary complete.""" + primary_flush_lsns = [] + for i in range(5): + primary.safe_psql("INSERT INTO wait_test VALUES (400 + {});".format(i)) + primary_flush_lsns.append( + primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + ) + + log_offset = primary.current_log_position() + + sessions = [] + for i in range(5): + sessions.append( + _launch_wait( + primary, + "\\echo start\n" + "WAIT FOR LSN '{lsn}' WITH (MODE 'primary_flush', timeout '1d');\n" + "SELECT log_wait_done('primary_flush_done', {i});\n".format( + lsn=primary_flush_lsns[i], i=i + ), + ) + ) + + for i in range(5): + primary.wait_for_log("primary_flush_done {}".format(i), log_offset) + sessions[i].quit() + + output = primary.safe_psql( + "SELECT pg_lsn_cmp(pg_current_wal_flush_lsn(), '{}'::pg_lsn);".format( + primary_flush_lsns[4] + ) + ) + assert ( + int(output) >= 0 + ), "multiple primary_flush waiters: primary flushed WAL up to target LSN" + + +def _promotion_terminates_waits(primary, standby): + """Section 8: standby promotion terminates all standby wait modes.""" + lsn4 = primary.safe_psql("SELECT pg_current_wal_insert_lsn() + 10000000000") + lsn5 = primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + + modes = ["standby_replay", "standby_write", "standby_flush"] + sessions = [] + for i in range(3): + sessions.append( + _launch_wait( + standby, + "\\echo start\n" + "WAIT FOR LSN '{lsn}' WITH (MODE '{mode}');\n".format( + lsn=lsn4, mode=modes[i] + ), + ) + ) + + primary.safe_psql("SELECT pg_switch_wal();") + primary.wait_for_catchup(standby) + + log_offset = standby.current_log_position() + standby.promote() + + standby.wait_for_log(r"Recovery ended before target LSN.*was written", log_offset) + standby.wait_for_log(r"Recovery ended before target LSN.*was flushed", log_offset) + standby.wait_for_log(r"Recovery ended before target LSN.*was replayed", log_offset) + + standby.safe_psql("WAIT FOR LSN '{}';".format(lsn5)) + + output = standby.safe_psql( + "WAIT FOR LSN '{}' WITH (timeout '10ms', no_throw);".format(lsn4) + ) + assert ( + output == "not in recovery" + ), "WAIT FOR returns correct status after standby promotion" + + standby.stop() + primary.stop() + + for session in sessions: + session.quit() + + +def _archive_only_standby(create_pg): + """Section 9: standby_write/standby_flush on an archive-only standby.""" + arc_primary = create_pg("arc_primary", has_archiving=True, allows_streaming=True) + arc_primary.safe_psql("CREATE TABLE arc_test AS SELECT generate_series(1,10) AS a") + arc_backup_name = "arc_backup" + arc_primary.backup(arc_backup_name) + + arc_primary.safe_psql("INSERT INTO arc_test VALUES (generate_series(11, 20))") + arc_target_lsn = arc_primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + + arc_segment = arc_primary.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn())") + arc_primary.safe_psql("SELECT pg_switch_wal()") + assert arc_primary.poll_query_until( + "SELECT last_archived_wal >= '{}' FROM pg_stat_archiver".format(arc_segment) + ), "Timed out waiting for WAL archiving on arc_primary" + + arc_standby = create_pg( + "arc_standby", + from_backup=(arc_primary, arc_backup_name), + has_restoring=True, + ) + + assert arc_standby.poll_query_until( + "SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '{}') >= 0".format( + arc_target_lsn + ) + ), "Timed out waiting for archive replay on arc_standby" + + output = arc_standby.safe_psql("SELECT count(*) FROM pg_stat_wal_receiver") + assert output == "0", "arc_standby has no walreceiver" + + # 9a. Getter fallback: succeed immediately when already replayed. + for mode in ("standby_write", "standby_flush"): + output = arc_standby.safe_psql( + "WAIT FOR LSN '{lsn}'\n" + "\tWITH (MODE '{mode}', timeout '3s', no_throw);".format( + lsn=arc_target_lsn, mode=mode + ) + ) + assert ( + output == "success" + ), "{} succeeds on archive-only standby (getter fallback)".format(mode) + + _archive_replay_waker(arc_primary, arc_standby) + + arc_standby.stop() + arc_primary.stop() + + +def _archive_replay_waker(arc_primary, arc_standby): + """Section 9b: sleeping standby_write/flush waiters woken by replay.""" + arc_standby.safe_psql("SELECT pg_wal_replay_pause()") + + arc_primary.safe_psql("INSERT INTO arc_test VALUES (generate_series(21, 30))") + arc_target_lsn2 = arc_primary.safe_psql("SELECT pg_current_wal_insert_lsn()") + + arc_segment2 = arc_primary.safe_psql("SELECT pg_walfile_name(pg_current_wal_lsn())") + arc_primary.safe_psql("SELECT pg_switch_wal()") + assert arc_primary.poll_query_until( + "SELECT last_archived_wal >= '{}' FROM pg_stat_archiver".format(arc_segment2) + ), "Timed out waiting for WAL archiving on arc_primary (round 2)" + + write_session = _launch_wait( + arc_standby, + "\\echo start\n" + "WAIT FOR LSN '{lsn}'\n" + "\tWITH (MODE 'standby_write', timeout '1d', no_throw);\n".format( + lsn=arc_target_lsn2 + ), + ) + flush_session = _launch_wait( + arc_standby, + "\\echo start\n" + "WAIT FOR LSN '{lsn}'\n" + "\tWITH (MODE 'standby_flush', timeout '1d', no_throw);\n".format( + lsn=arc_target_lsn2 + ), + ) + + _expect_blocked(arc_standby, 2, "WaitForWal%") + + arc_standby.safe_psql("SELECT pg_wal_replay_resume()") + + write_session.quit() + flush_session.quit() + assert ( + write_session.stdout.rstrip("\n") == "success" + ), "standby_write waiter woken by replay on archive-only standby" + assert ( + flush_session.stdout.rstrip("\n") == "success" + ), "standby_flush waiter woken by replay on archive-only standby" + + +def _fresh_shmem_walreceiver(create_pg): + """Sections 10-11: fresh-shmem walreceiver startup and fencepost checks.""" + rcv_primary = create_pg("rcv_primary", allows_streaming=True, start=False) + rcv_primary.append_conf("autovacuum = off") + rcv_primary.start() + rcv_primary.safe_psql("CREATE TABLE rcv_test AS SELECT generate_series(1,10) AS a") + + rcv_backup = "rcv_backup" + rcv_primary.backup(rcv_backup) + + rcv_standby = create_pg( + "rcv_standby", from_backup=(rcv_primary, rcv_backup), has_streaming=True + ) + + rcv_primary.safe_psql("INSERT INTO rcv_test VALUES (generate_series(11, 100))") + rcv_primary.safe_psql("SELECT pg_switch_wal()") + rcv_primary.safe_psql("INSERT INTO rcv_test VALUES (generate_series(101, 110))") + rcv_primary.wait_for_catchup(rcv_standby) + + rcv_standby.stop() + rcv_primary.stop() + rcv_standby.start() + + assert rcv_standby.poll_query_until( + "SELECT pg_last_wal_receive_lsn() IS NOT NULL;" + ), "walreceiver initial value did not become visible" + + rcv_standby.safe_psql("SELECT pg_wal_replay_pause()") + assert rcv_standby.poll_query_until( + "SELECT pg_get_wal_replay_pause_state() = 'paused'" + ), "Timed out waiting for rcv_standby replay to pause" + + _fresh_shmem_checks(rcv_standby) + + rcv_standby.safe_psql("SELECT pg_wal_replay_resume()") + rcv_primary.start() + rcv_primary.safe_psql("INSERT INTO rcv_test VALUES (generate_series(111, 120))") + rcv_primary.wait_for_catchup(rcv_standby) + + _fencepost_checks(rcv_primary, rcv_standby) + + rcv_standby.stop() + rcv_primary.stop() + + +def _fresh_shmem_checks(rcv_standby): + """Section 10: verify seeded walreceiver flush position and replay floor.""" + rcv_receive = rcv_standby.safe_psql("SELECT pg_last_wal_receive_lsn()") + rcv_replay = rcv_standby.safe_psql("SELECT pg_last_wal_replay_lsn()") + rcv_gap = rcv_standby.safe_psql( + "SELECT pg_wal_lsn_diff('{replay}'::pg_lsn, '{recv}'::pg_lsn) > 0".format( + replay=rcv_replay, recv=rcv_receive + ) + ) + assert rcv_gap == "t", "replay sits ahead of initial walreceiver flush position" + + rcv_receive_offset = rcv_standby.safe_psql( + "SELECT mod(pg_wal_lsn_diff('{recv}'::pg_lsn, '0/0'::pg_lsn),\n" + " setting::numeric)::int\n" + " FROM pg_settings\n" + " WHERE name = 'wal_segment_size'".format(recv=rcv_receive) + ) + assert ( + rcv_receive_offset == "0" + ), "initial walreceiver flush position is segment-aligned" + + for rcv_mode in ("standby_write", "standby_flush"): + output = rcv_standby.safe_psql( + "WAIT FOR LSN '{lsn}'\n" + "\tWITH (MODE '{mode}', timeout '5s', no_throw);".format( + lsn=rcv_replay, mode=rcv_mode + ) + ) + assert ( + output == "success" + ), "{} succeeds for already-replayed LSN after standby restart".format(rcv_mode) + + +def _fencepost_checks(rcv_primary, rcv_standby): + """Section 11: off-by-one boundary checks for the wait predicate.""" + saved = _stop_walreceiver(rcv_standby) + rcv_standby.safe_psql("SELECT pg_wal_replay_pause()") + assert rcv_standby.poll_query_until( + "SELECT pg_get_wal_replay_pause_state() = 'paused'" + ), "Timed out waiting for rcv_standby replay to pause" + + # 11a. standby_replay exact fencepost. + replay_lsn = rcv_standby.safe_psql("SELECT pg_last_wal_replay_lsn()") + _, replay_lsn_plus = _check_wait_for_lsn_fencepost( + rcv_standby, "standby_replay", replay_lsn, "standby_replay" + ) + + # 11b. standby_flush exact fencepost. + flush_lsn = rcv_standby.safe_psql("SELECT pg_last_wal_receive_lsn()") + flush_covers_replay = rcv_standby.safe_psql( + "SELECT pg_wal_lsn_diff('{flush}'::pg_lsn, '{replay}'::pg_lsn) >= 0".format( + flush=flush_lsn, replay=replay_lsn + ) + ) + assert ( + flush_covers_replay == "t" + ), "standby_flush boundary is not masked by replay floor" + + _check_wait_for_lsn_fencepost( + rcv_standby, "standby_flush", flush_lsn, "standby_flush" + ) + + # 11c. A sleeping waiter at current + 1 wakes once replay advances past it. + rcv_primary.safe_psql("INSERT INTO rcv_test VALUES (generate_series(200, 210))") + + boundary_session = _launch_wait( + rcv_standby, + "\\echo start\n" + "WAIT FOR LSN '{lsn}'\n" + "\tWITH (MODE 'standby_replay', timeout '1d', no_throw);\n".format( + lsn=replay_lsn_plus + ), + ) + assert rcv_standby.poll_query_until( + "SELECT count(*) > 0 FROM pg_stat_activity " + "WHERE wait_event = 'WaitForWalReplay'" + ), "Boundary waiter did not sleep" + + rcv_standby.safe_psql("SELECT pg_wal_replay_resume()") + _resume_walreceiver(rcv_standby, saved) + boundary_session.quit() + assert ( + boundary_session.stdout.rstrip("\n") == "success" + ), "standby_replay: waiter at current + 1 wakes when replay advances" + + +def _cascade_timeline_switch(create_pg): + """Section 12: a WAIT FOR waiter survives a cascade upstream's promotion.""" + tl_primary = create_pg("tl_primary", allows_streaming=True, start=False) + tl_primary.append_conf("autovacuum = off") + tl_primary.start() + tl_primary.safe_psql("CREATE TABLE tl_test AS SELECT generate_series(1, 10) AS a") + + tl_backup = "tl_backup" + tl_primary.backup(tl_backup) + + tl_standby1 = create_pg( + "tl_standby1", from_backup=(tl_primary, tl_backup), has_streaming=True + ) + + tl_backup2 = "tl_backup2" + tl_standby1.backup(tl_backup2) + + tl_standby2 = create_pg( + "tl_standby2", from_backup=(tl_standby1, tl_backup2), has_streaming=True + ) + + tl_primary.safe_psql("INSERT INTO tl_test VALUES (generate_series(11, 20))") + tl_primary.wait_for_catchup(tl_standby1) + tl_standby1.wait_for_catchup(tl_standby2) + + tl_target = tl_primary.safe_psql( + "SELECT (pg_current_wal_insert_lsn() + 65536)::text" + ) + + tl_standby2.safe_psql("SELECT pg_wal_replay_pause()") + assert tl_standby2.poll_query_until( + "SELECT pg_get_wal_replay_pause_state() = 'paused'" + ), "Timed out waiting for tl_standby2 replay to pause" + + tl_session = _launch_wait( + tl_standby2, + "\\echo start\n" + "WAIT FOR LSN '{lsn}'\n" + "\tWITH (MODE 'standby_replay', timeout '1d', no_throw);\n".format( + lsn=tl_target + ), + ) + assert tl_standby2.poll_query_until( + "SELECT count(*) > 0 FROM pg_stat_activity " + "WHERE wait_event = 'WaitForWalReplay'" + ), "Cascade waiter did not sleep before promotion" + + tl_standby1.promote() + tl_standby1.safe_psql("INSERT INTO tl_test VALUES (generate_series(21, 1020))") + tl_standby1.safe_psql("SELECT pg_switch_wal()") + + tl_standby2.safe_psql("SELECT pg_wal_replay_resume()") + + assert tl_standby2.poll_query_until( + "SELECT received_tli > 1 FROM pg_stat_wal_receiver" + ), "tl_standby2 did not follow upstream timeline switch" + + tl_session.quit() + assert tl_session.stdout.rstrip("\n") == "success", ( + "WAIT FOR LSN survives upstream promotion and timeline switch on " + "cascade standby" + ) + + tl_standby2.stop() + tl_standby1.stop() + tl_primary.stop() diff --git a/src/test/recovery/pyt/test_050_redo_segment_missing.py b/src/test/recovery/pyt/test_050_redo_segment_missing.py new file mode 100644 index 0000000000000..cba9a205fa984 --- /dev/null +++ b/src/test/recovery/pyt/test_050_redo_segment_missing.py @@ -0,0 +1,66 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/050_redo_segment_missing.pl. + +When the WAL segment holding a checkpoint's redo location is missing at startup, +the server must FATAL rather than start. Injection points pause checkpointing so +a WAL switch lands the redo and checkpoint records in different segments; the +redo segment is then deleted before a restart. Requires injection points. +""" + +import os +import re + +import pytest + +import pypg + + +def test_050_redo_segment_missing(create_pg, pg_bin): + """A missing redo WAL segment makes startup FATAL, not succeed.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("testnode", start=False) + node.append_conf("log_checkpoints = on") + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql("CREATE EXTENSION injection_points") + node.safe_psql( + "select injection_points_attach('create-checkpoint-initial', 'wait')" + ) + node.safe_psql("select injection_points_attach('create-checkpoint-run', 'wait')") + checkpoint = node.background_psql("postgres") + checkpoint.query_until( + r"starting_checkpoint", "\\echo starting_checkpoint\ncheckpoint;\n" + ) + node.wait_for_event("checkpointer", "create-checkpoint-initial") + node.safe_psql("select injection_points_wakeup('create-checkpoint-initial')") + node.wait_for_event("checkpointer", "create-checkpoint-run") + node.safe_psql("SELECT pg_switch_wal()") + log_offset = node.current_log_position() + node.safe_psql("select injection_points_wakeup('create-checkpoint-run')") + node.wait_for_log(r"checkpoint complete", log_offset) + checkpoint.quit() + redo_lsn = node.safe_psql("SELECT redo_lsn FROM pg_control_checkpoint()") + redo_walfile_name = node.safe_psql("SELECT pg_walfile_name('{}')".format(redo_lsn)) + checkpoint_lsn = node.safe_psql( + "SELECT checkpoint_lsn FROM pg_control_checkpoint()" + ) + checkpoint_walfile_name = node.safe_psql( + "SELECT pg_walfile_name('{}')".format(checkpoint_lsn) + ) + assert ( + redo_walfile_name != checkpoint_walfile_name + ), "redo and checkpoint records on different segments" + os.unlink("{}/pg_wal/{}".format(node.datadir, redo_walfile_name)) + node.stop("immediate") + pg_bin.run_command( + ["pg_ctl", "--pgdata", str(node.datadir), "--log", str(node.log), "start"] + ) + logfile = pypg.slurp_file(node.log) + assert re.search( + r"FATAL: .* could not find redo location .* referenced by checkpoint " + r"record at .*", + logfile, + ), "ends with FATAL because it could not find redo location" diff --git a/src/test/recovery/pyt/test_051_effective_wal_level.py b/src/test/recovery/pyt/test_051_effective_wal_level.py new file mode 100644 index 0000000000000..96b747159fdcf --- /dev/null +++ b/src/test/recovery/pyt/test_051_effective_wal_level.py @@ -0,0 +1,220 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/051_effective_wal_level.pl. + +Exercises the effective_wal_level machinery: with wal_level='replica', creating +a logical slot raises effective_wal_level to 'logical' (and dropping the last +one lowers it back). Covers persistence across restart, refusal to start at +wal_level='minimal' with a live logical slot, slot invalidation via +max_slot_wal_keep_size, and propagation of effective_wal_level across +standby/cascade promotions. +""" + +import re + + +def _test_wal_level(node, expected, msg): + assert ( + node.safe_psql( + "select current_setting('wal_level'), " + "current_setting('effective_wal_level');" + ) + == expected + ), msg + + +def _wait_logical_decoding_disabled(node): + assert node.poll_query_until( + "select current_setting('effective_wal_level') = 'replica';" + ) + + +def test_051_effective_wal_level(create_pg, pg_bin): + """effective_wal_level tracks logical slots and propagates across promotion.""" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf("log_min_messages = debug1") + primary.start() + _test_wal_level( + primary, + "replica|replica", + "wal_level and effective_wal_level start at 'replica'", + ) + primary.safe_psql( + "select pg_create_physical_replication_slot('test_phy_slot', false, false)" + ) + _test_wal_level( + primary, + "replica|replica", + "effective_wal_level unchanged with a new physical slot", + ) + primary.safe_psql("select pg_drop_replication_slot('test_phy_slot')") + primary.safe_psql( + "select pg_create_logical_replication_slot('test_tmp_slot', " + "'test_decoding', true)" + ) + assert primary.log_contains( + "logical decoding is enabled upon creating a new logical replication slot" + ), "logical decoding enabled upon creating a temp slot" + _wait_logical_decoding_disabled(primary) + primary.safe_psql("create table foo(a int primary key)") + primary.safe_psql("repack (concurrently) foo;") + assert primary.log_contains( + "logical decoding is enabled upon creating a new logical replication slot" + ), "logical decoding enabled by repack" + _wait_logical_decoding_disabled(primary) + _test_wal_level( + primary, "replica|replica", "logical decoding disabled after repack" + ) + primary.safe_psql( + "select pg_create_logical_replication_slot('test_slot', 'pgoutput')" + ) + _test_wal_level( + primary, + "replica|logical", + "effective_wal_level increased to 'logical' on a logical slot", + ) + primary.restart() + _test_wal_level( + primary, + "replica|logical", + "effective_wal_level remains 'logical' after restart", + ) + primary.safe_psql( + "select pg_create_logical_replication_slot('test_slot2', 'pgoutput')" + ) + primary.safe_psql("select pg_drop_replication_slot('test_slot2')") + _test_wal_level( + primary, + "replica|logical", + "effective_wal_level stays 'logical' as one slot remains", + ) + _minimal_refusal(primary, pg_bin) + _invalidation_and_propagation(primary, create_pg) + + +def _minimal_refusal(primary, pg_bin): + import pypg # pylint: disable=import-outside-toplevel + + primary.adjust_conf("wal_level", "minimal") + primary.adjust_conf("max_wal_senders", "0") + primary.stop() + pg_bin.command_fails( + [ + "pg_ctl", + "--pgdata", + str(primary.datadir), + "--log", + str(primary.log), + "start", + ], + "cannot start with wal_level='minimal' and an in-use logical slot", + ) + logfile = pypg.slurp_file(primary.log) + assert re.search( + r'logical replication slot "test_slot" exists, but "wal_level" < "replica"', + logfile, + ), "logical slots require logical decoding enabled at startup" + primary.adjust_conf("wal_level", "replica") + primary.adjust_conf("max_wal_senders", "10") + primary.append_conf( + "\nmin_wal_size = 32MB\nmax_wal_size = 32MB\nmax_slot_wal_keep_size = 16MB\n" + ) + primary.start() + primary.advance_wal(2) + primary.safe_psql("CHECKPOINT") + assert ( + primary.safe_psql( + "select invalidation_reason = 'wal_removed' from pg_replication_slots " + "where slot_name = 'test_slot';" + ) + == "t" + ), "test_slot invalidated due to wal_removed" + _wait_logical_decoding_disabled(primary) + _test_wal_level( + primary, + "replica|replica", + "effective_wal_level decreased to 'replica' after invalidation", + ) + primary.adjust_conf("max_slot_wal_keep_size", None) + primary.adjust_conf("min_wal_size", None) + primary.adjust_conf("max_wal_size", None) + primary.restart() + primary.safe_psql("select pg_drop_replication_slot('test_slot')") + primary.safe_psql( + "select pg_create_logical_replication_slot('test_slot', 'pgoutput')" + ) + + +def _invalidation_and_propagation(primary, create_pg): + primary.backup("my_backup") + standby1 = create_pg( + "standby1", from_backup=(primary, "my_backup"), has_streaming=True, start=False + ) + standby1.start() + primary.wait_for_replay_catchup(standby1) + standby1.create_logical_slot_on_standby(primary, "standby1_slot", "postgres") + standby1.promote() + _test_wal_level( + standby1, + "replica|logical", + "effective_wal_level remains 'logical' after promotion", + ) + standby1.safe_psql( + "select pg_create_logical_replication_slot('standby1_slot2', 'pgoutput')" + ) + standby1.stop() + standby2 = create_pg( + "standby2", from_backup=(primary, "my_backup"), has_streaming=True, start=False + ) + standby2.append_conf("wal_level = 'logical'") + standby2.start() + standby2.backup("my_backup3") + cascade = create_pg( + "cascade", from_backup=(standby2, "my_backup3"), has_streaming=True, start=False + ) + cascade.adjust_conf("wal_level", "replica") + cascade.start() + _test_wal_level(standby2, "logical|logical", "wal_levels on standby") + _test_wal_level(cascade, "replica|logical", "wal_levels on cascaded standby") + primary.safe_psql("select pg_drop_replication_slot('test_slot')") + _wait_logical_decoding_disabled(primary) + primary.wait_for_replay_catchup(standby2) + standby2.wait_for_replay_catchup(cascade, primary) + _test_wal_level(primary, "replica|replica", "effective_wal_level down on primary") + _test_wal_level(standby2, "logical|replica", "effective_wal_level down on standby") + _test_wal_level(cascade, "replica|replica", "effective_wal_level down on cascade") + standby2.promote() + standby2.wait_for_replay_catchup(cascade) + _test_wal_level( + cascade, + "replica|logical", + "effective_wal_level up on cascade after new primary is logical", + ) + standby2.stop() + cascade.stop() + _standby3_invalidation(primary, create_pg) + + +def _standby3_invalidation(primary, create_pg): + standby3 = create_pg( + "standby3", from_backup=(primary, "my_backup"), has_streaming=True, start=False + ) + standby3.start() + primary.safe_psql( + "select pg_create_logical_replication_slot('test_slot', 'pgoutput')" + ) + primary.wait_for_replay_catchup(standby3) + standby3.create_logical_slot_on_standby(primary, "standby3_slot", "postgres") + primary.safe_psql("select pg_drop_replication_slot('test_slot')") + _wait_logical_decoding_disabled(primary) + _test_wal_level( + primary, + "replica|replica", + "effective_wal_level down on primary to invalidate standby slots", + ) + assert standby3.poll_query_until( + "select invalidation_reason = 'wal_level_insufficient' " + "from pg_replication_slots where slot_name = 'standby3_slot';" + ) + standby3.stop() + primary.stop() diff --git a/src/test/recovery/pyt/test_052_checkpoint_segment_missing.py b/src/test/recovery/pyt/test_052_checkpoint_segment_missing.py new file mode 100644 index 0000000000000..04f243dec4d71 --- /dev/null +++ b/src/test/recovery/pyt/test_052_checkpoint_segment_missing.py @@ -0,0 +1,45 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/052_checkpoint_segment_missing.pl. + +Verify crash recovery behavior when the WAL segment containing the checkpoint +record referenced by pg_controldata is missing and there is no backup_label +file: the startup process should fail with a FATAL about the missing +checkpoint record. +""" + +import re + +from pypg import slurp_file + + +def test_checkpoint_segment_missing(pg_bin, create_pg): + """Recovery FATALs when the checkpoint WAL segment is gone (no backup_label).""" + node = create_pg("testnode", start=False) + node.append_conf("log_checkpoints = on") + node.start() + + # Force a checkpoint so pg_controldata points to a record we can target. + node.safe_psql("CHECKPOINT;") + + checkpoint_walfile = node.safe_psql( + "SELECT pg_walfile_name(checkpoint_lsn) FROM pg_control_checkpoint()" + ) + assert checkpoint_walfile != "", "derived checkpoint WAL file name" + + node.stop("immediate") + + walpath = node.datadir / "pg_wal" / checkpoint_walfile + assert walpath.is_file(), "checkpoint WAL file exists before deletion" + walpath.unlink() + assert not walpath.exists(), "checkpoint WAL file removed" + + # Use pg_ctl directly (not node.start) because recovery is expected to fail. + pg_bin.result( + ["pg_ctl", "--pgdata", str(node.datadir), "--log", str(node.log), "start"] + ) + + assert re.search( + r"FATAL: .* could not locate a valid checkpoint record at .*", + slurp_file(node.log), + ), "FATAL logged for missing checkpoint record (no backup_label path)" diff --git a/src/test/recovery/pyt/test_053_standby_login_event_trigger.py b/src/test/recovery/pyt/test_053_standby_login_event_trigger.py new file mode 100644 index 0000000000000..d8ea90e70678a --- /dev/null +++ b/src/test/recovery/pyt/test_053_standby_login_event_trigger.py @@ -0,0 +1,76 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +"""Port of src/test/recovery/t/053_standby_login_event_trigger.pl. + +Connecting to a standby still works after a login event trigger has been +created and dropped on the primary, leaving a dangling pg_database.dathasloginevt +flag that replicates to the standby. +""" + +import re + +_HASLOGINEVT = ( + "SELECT dathasloginevt FROM pg_database WHERE datname = 'regress_login_evt'" +) + + +def test_standby_login_event_trigger(create_pg): + """A standby tolerates a dangling dathasloginevt flag on a login to that DB.""" + primary = create_pg("primary", allows_streaming=True) + primary.backup("login_evt_backup") + standby = create_pg( + "standby", + from_backup=(primary, "login_evt_backup"), + has_streaming=True, + start=False, + ) + standby.start() + + # A dedicated database isolates the dangling flag from helpers that connect + # to "postgres". + primary.safe_psql("CREATE DATABASE regress_login_evt") + primary.wait_for_catchup(standby) + standby.safe_psql("SELECT 1", dbname="regress_login_evt") + + # Create then drop a login event trigger in that database; the flag stays + # set on disk until a later login on the primary clears it. + primary.safe_psql( + "CREATE FUNCTION init_session() RETURNS event_trigger " + "LANGUAGE plpgsql AS $$ BEGIN RAISE NOTICE 'init_session'; END $$;\n" + "CREATE EVENT TRIGGER init_session ON login " + "EXECUTE FUNCTION init_session();\n" + "ALTER EVENT TRIGGER init_session ENABLE ALWAYS;\n" + "DROP EVENT TRIGGER init_session;\n" + "DROP FUNCTION init_session();", + dbname="regress_login_evt", + ) + primary.wait_for_catchup(standby) + + assert ( + primary.safe_psql(_HASLOGINEVT) == "t" + ), "dathasloginevt remains set on primary after DROP EVENT TRIGGER" + assert ( + standby.safe_psql(_HASLOGINEVT) == "t" + ), "dathasloginevt replicated to standby" + + # A login to that DB on the standby must not try to clear the flag (which + # would need AccessExclusiveLock, forbidden during recovery). + result = standby.psql_capture("SELECT 1", dbname="regress_login_evt") + assert result.rc == 0, "standby accepts connection to DB with dangling flag" + assert not re.search( + r"cannot acquire lock mode AccessExclusiveLock", result.stderr + ), "no AccessExclusiveLock FATAL on standby login" + + # A login on the primary clears the flag in place. + primary.safe_psql("SELECT 1", dbname="regress_login_evt") + assert ( + primary.safe_psql(_HASLOGINEVT) == "f" + ), "primary clears dathasloginevt on next login after DROP" + + # The in-place update isn't auto-flushed; force a flush so it reaches the + # standby. + primary.safe_psql("SELECT pg_switch_wal()") + primary.wait_for_catchup(standby) + assert ( + standby.safe_psql(_HASLOGINEVT) == "f" + ), "cleared dathasloginevt replicates to standby" From 0d8de31a33558f9c9c72d10715eb2fa9905c9c4c Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:33 -0400 Subject: [PATCH 14/36] pytest: port the src/test/subscription TAP suite Port the logical-replication subscription TAP tests (rep changes, sync, DDL, streaming, two-phase, column lists, sequences, conflicts, run-as-owner) to pytest. Co-authored-by: Greg Burd --- src/test/subscription/Makefile | 1 + src/test/subscription/meson.build | 47 ++ .../subscription/pyt/test_001_rep_changes.py | 362 ++++++++++++ src/test/subscription/pyt/test_002_types.py | 63 +++ .../subscription/pyt/test_003_constraints.py | 112 ++++ src/test/subscription/pyt/test_004_sync.py | 103 ++++ .../subscription/pyt/test_005_encoding.py | 40 ++ src/test/subscription/pyt/test_006_rewrite.py | 50 ++ src/test/subscription/pyt/test_007_ddl.py | 134 +++++ .../subscription/pyt/test_008_diff_schema.py | 96 ++++ .../subscription/pyt/test_009_matviews.py | 37 ++ .../subscription/pyt/test_010_truncate.py | 138 +++++ .../subscription/pyt/test_011_generated.py | 221 ++++++++ .../subscription/pyt/test_012_collation.py | 68 +++ .../subscription/pyt/test_013_partition.py | 517 ++++++++++++++++++ src/test/subscription/pyt/test_014_binary.py | 177 ++++++ src/test/subscription/pyt/test_015_stream.py | 220 ++++++++ .../pyt/test_016_stream_subxact.py | 103 ++++ .../subscription/pyt/test_017_stream_ddl.py | 100 ++++ .../pyt/test_018_stream_subxact_abort.py | 169 ++++++ .../pyt/test_019_stream_subxact_ddl_abort.py | 58 ++ .../subscription/pyt/test_020_messages.py | 124 +++++ .../subscription/pyt/test_021_twophase.py | 245 +++++++++ .../pyt/test_022_twophase_cascade.py | 213 ++++++++ .../pyt/test_023_twophase_stream.py | 218 ++++++++ .../subscription/pyt/test_024_add_drop_pub.py | 81 +++ .../pyt/test_025_rep_changes_for_schema.py | 136 +++++ src/test/subscription/pyt/test_026_stats.py | 222 ++++++++ .../subscription/pyt/test_027_nosuperuser.py | 295 ++++++++++ .../subscription/pyt/test_028_row_filter.py | 499 +++++++++++++++++ .../subscription/pyt/test_029_on_error.py | 132 +++++ src/test/subscription/pyt/test_030_origin.py | 215 ++++++++ .../subscription/pyt/test_031_column_list.py | 505 +++++++++++++++++ .../pyt/test_032_subscribe_use_index.py | 302 ++++++++++ .../pyt/test_033_run_as_table_owner.py | 189 +++++++ .../subscription/pyt/test_034_temporal.py | 466 ++++++++++++++++ .../subscription/pyt/test_035_conflicts.py | 454 +++++++++++++++ .../subscription/pyt/test_036_sequences.py | 136 +++++ src/test/subscription/pyt/test_037_except.py | 166 ++++++ .../pyt/test_038_walsnd_shutdown_timeout.py | 169 ++++++ src/test/subscription/pyt/test_100_bugs.py | 425 ++++++++++++++ 41 files changed, 8008 insertions(+) create mode 100644 src/test/subscription/pyt/test_001_rep_changes.py create mode 100644 src/test/subscription/pyt/test_002_types.py create mode 100644 src/test/subscription/pyt/test_003_constraints.py create mode 100644 src/test/subscription/pyt/test_004_sync.py create mode 100644 src/test/subscription/pyt/test_005_encoding.py create mode 100644 src/test/subscription/pyt/test_006_rewrite.py create mode 100644 src/test/subscription/pyt/test_007_ddl.py create mode 100644 src/test/subscription/pyt/test_008_diff_schema.py create mode 100644 src/test/subscription/pyt/test_009_matviews.py create mode 100644 src/test/subscription/pyt/test_010_truncate.py create mode 100644 src/test/subscription/pyt/test_011_generated.py create mode 100644 src/test/subscription/pyt/test_012_collation.py create mode 100644 src/test/subscription/pyt/test_013_partition.py create mode 100644 src/test/subscription/pyt/test_014_binary.py create mode 100644 src/test/subscription/pyt/test_015_stream.py create mode 100644 src/test/subscription/pyt/test_016_stream_subxact.py create mode 100644 src/test/subscription/pyt/test_017_stream_ddl.py create mode 100644 src/test/subscription/pyt/test_018_stream_subxact_abort.py create mode 100644 src/test/subscription/pyt/test_019_stream_subxact_ddl_abort.py create mode 100644 src/test/subscription/pyt/test_020_messages.py create mode 100644 src/test/subscription/pyt/test_021_twophase.py create mode 100644 src/test/subscription/pyt/test_022_twophase_cascade.py create mode 100644 src/test/subscription/pyt/test_023_twophase_stream.py create mode 100644 src/test/subscription/pyt/test_024_add_drop_pub.py create mode 100644 src/test/subscription/pyt/test_025_rep_changes_for_schema.py create mode 100644 src/test/subscription/pyt/test_026_stats.py create mode 100644 src/test/subscription/pyt/test_027_nosuperuser.py create mode 100644 src/test/subscription/pyt/test_028_row_filter.py create mode 100644 src/test/subscription/pyt/test_029_on_error.py create mode 100644 src/test/subscription/pyt/test_030_origin.py create mode 100644 src/test/subscription/pyt/test_031_column_list.py create mode 100644 src/test/subscription/pyt/test_032_subscribe_use_index.py create mode 100644 src/test/subscription/pyt/test_033_run_as_table_owner.py create mode 100644 src/test/subscription/pyt/test_034_temporal.py create mode 100644 src/test/subscription/pyt/test_035_conflicts.py create mode 100644 src/test/subscription/pyt/test_036_sequences.py create mode 100644 src/test/subscription/pyt/test_037_except.py create mode 100644 src/test/subscription/pyt/test_038_walsnd_shutdown_timeout.py create mode 100644 src/test/subscription/pyt/test_100_bugs.py diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile index 1b22703dc21cd..4a5c1ce57ab05 100644 --- a/src/test/subscription/Makefile +++ b/src/test/subscription/Makefile @@ -21,6 +21,7 @@ export enable_injection_points check: $(prove_check) + $(pytest_check) installcheck: $(prove_installcheck) diff --git a/src/test/subscription/meson.build b/src/test/subscription/meson.build index e71e95c6297eb..4dcdc69f5464e 100644 --- a/src/test/subscription/meson.build +++ b/src/test/subscription/meson.build @@ -51,4 +51,51 @@ tests += { 't/100_bugs.pl', ], }, + 'pytest': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + 'with_icu': icu.found() ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_002_types.py', + 'pyt/test_001_rep_changes.py', + 'pyt/test_003_constraints.py', + 'pyt/test_004_sync.py', + 'pyt/test_005_encoding.py', + 'pyt/test_006_rewrite.py', + 'pyt/test_007_ddl.py', + 'pyt/test_008_diff_schema.py', + 'pyt/test_009_matviews.py', + 'pyt/test_010_truncate.py', + 'pyt/test_011_generated.py', + 'pyt/test_012_collation.py', + 'pyt/test_013_partition.py', + 'pyt/test_014_binary.py', + 'pyt/test_016_stream_subxact.py', + 'pyt/test_017_stream_ddl.py', + 'pyt/test_018_stream_subxact_abort.py', + 'pyt/test_019_stream_subxact_ddl_abort.py', + 'pyt/test_020_messages.py', + 'pyt/test_021_twophase.py', + 'pyt/test_022_twophase_cascade.py', + 'pyt/test_023_twophase_stream.py', + 'pyt/test_024_add_drop_pub.py', + 'pyt/test_027_nosuperuser.py', + 'pyt/test_028_row_filter.py', + 'pyt/test_029_on_error.py', + 'pyt/test_030_origin.py', + 'pyt/test_031_column_list.py', + 'pyt/test_032_subscribe_use_index.py', + 'pyt/test_033_run_as_table_owner.py', + 'pyt/test_034_temporal.py', + 'pyt/test_036_sequences.py', + 'pyt/test_037_except.py', + 'pyt/test_025_rep_changes_for_schema.py', + 'pyt/test_026_stats.py', + 'pyt/test_038_walsnd_shutdown_timeout.py', + 'pyt/test_015_stream.py', + 'pyt/test_035_conflicts.py', + 'pyt/test_100_bugs.py', + ], + }, } diff --git a/src/test/subscription/pyt/test_001_rep_changes.py b/src/test/subscription/pyt/test_001_rep_changes.py new file mode 100644 index 0000000000000..8a56d678dcea8 --- /dev/null +++ b/src/test/subscription/pyt/test_001_rep_changes.py @@ -0,0 +1,362 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/subscription/t/001_rep_changes.pl. + +Core logical-replication change propagation: initial table sync, incremental +INSERT/UPDATE/DELETE across replica-identity variants, REPLICA IDENTITY +NOTHING/FULL, included-column indexes, no-column tables, ALTER PUBLICATION +ADD/DROP TABLE, multiple publications, CONNECTION-string options reaching the +walsender (log_statement_stats -> QUERY STATISTICS), and the wal_level=minimal +CREATE PUBLICATION warning. Generated from the Perl original via +.agent/gen_golden.py with three procedural spots hand-finished. +""" + +import re + + +def test_001_rep_changes(create_pg): + """Generated golden port of 001_rep_changes.""" + node_publisher = create_pg("publisher", allows_streaming="logical", start=False) + node_publisher.start() + node_subscriber = create_pg("subscriber", start=False) + node_subscriber.start() + node_publisher.safe_psql( + "CREATE FUNCTION public.pg_get_replica_identity_index(int)\n\t RETURNS regclass LANGUAGE sql AS 'SELECT 1/0'" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_notrep AS SELECT generate_series(1,10) AS a" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_ins AS SELECT generate_series(1,1002) AS a" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_full AS SELECT generate_series(1,10) AS a" + ) + node_publisher.safe_psql("CREATE TABLE tab_full2 (x text)") + node_publisher.safe_psql("INSERT INTO tab_full2 VALUES ('a'), ('b'), ('b')") + node_publisher.safe_psql("CREATE TABLE tab_rep (a int primary key)") + node_publisher.safe_psql( + "CREATE TABLE tab_mixed (a int primary key, b text, c numeric)" + ) + node_publisher.safe_psql("INSERT INTO tab_mixed (a, b, c) VALUES (1, 'foo', 1.1)") + node_publisher.safe_psql( + "CREATE TABLE tab_include (a int, b text, CONSTRAINT covering PRIMARY KEY(a) INCLUDE(b))" + ) + node_publisher.safe_psql("CREATE TABLE tab_full_pk (a int primary key, b text)") + node_publisher.safe_psql("ALTER TABLE tab_full_pk REPLICA IDENTITY FULL") + node_publisher.safe_psql("CREATE TABLE tab_nothing (a int)") + node_publisher.safe_psql("ALTER TABLE tab_nothing REPLICA IDENTITY NOTHING") + node_publisher.safe_psql("CREATE TABLE tab_no_replidentity_index(c1 int)") + node_publisher.safe_psql( + "CREATE INDEX idx_no_replidentity_index ON tab_no_replidentity_index(c1)" + ) + node_publisher.safe_psql("CREATE TABLE tab_no_col()") + node_publisher.safe_psql("INSERT INTO tab_no_col default VALUES") + node_subscriber.safe_psql("CREATE TABLE tab_notrep (a int)") + node_subscriber.safe_psql("CREATE TABLE tab_ins (a int)") + node_subscriber.safe_psql("CREATE TABLE tab_full (a int)") + node_subscriber.safe_psql("CREATE TABLE tab_full2 (x text)") + node_subscriber.safe_psql("CREATE TABLE tab_rep (a int primary key)") + node_subscriber.safe_psql("CREATE TABLE tab_full_pk (a int primary key, b text)") + node_subscriber.safe_psql("ALTER TABLE tab_full_pk REPLICA IDENTITY FULL") + node_subscriber.safe_psql("CREATE TABLE tab_nothing (a int)") + node_subscriber.safe_psql( + "CREATE TABLE tab_mixed (d text default 'local', c numeric, b text, a int primary key)" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab_include (a int, b text, CONSTRAINT covering PRIMARY KEY(a) INCLUDE(b))" + ) + node_subscriber.safe_psql("CREATE TABLE tab_no_replidentity_index(c1 int)") + node_subscriber.safe_psql( + "CREATE INDEX idx_no_replidentity_index ON tab_no_replidentity_index(c1)" + ) + node_subscriber.safe_psql("CREATE TABLE tab_no_col()") + publisher_connstr = node_publisher.connstr() + " dbname=postgres" + node_publisher.safe_psql("CREATE PUBLICATION tap_pub") + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_ins_only WITH (publish = insert)" + ) + node_publisher.safe_psql( + "ALTER PUBLICATION tap_pub ADD TABLE tab_rep, tab_full, tab_full2, tab_mixed, tab_include, tab_nothing, tab_full_pk, tab_no_replidentity_index, tab_no_col" + ) + node_publisher.safe_psql("ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_ins") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '" + + publisher_connstr + + "' PUBLICATION tap_pub, tap_pub_ins_only" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("SELECT pg_stat_reset_shared('io')") + result = node_subscriber.safe_psql("SELECT count(*) FROM tab_notrep") + assert result == "0", "check non-replicated table is empty on subscriber" + result = node_subscriber.safe_psql("SELECT count(*) FROM tab_ins") + assert result == "1002", "check initial data was copied to subscriber" + node_publisher.safe_psql("INSERT INTO tab_ins SELECT generate_series(1,50)") + node_publisher.safe_psql("DELETE FROM tab_ins WHERE a > 20") + node_publisher.safe_psql("UPDATE tab_ins SET a = -a") + node_publisher.safe_psql("INSERT INTO tab_rep SELECT generate_series(1,50)") + node_publisher.safe_psql("DELETE FROM tab_rep WHERE a > 20") + node_publisher.safe_psql("UPDATE tab_rep SET a = -a") + node_publisher.safe_psql("INSERT INTO tab_mixed VALUES (2, 'bar', 2.2)") + node_publisher.safe_psql("INSERT INTO tab_full_pk VALUES (1, 'foo'), (2, 'baz')") + node_publisher.safe_psql("INSERT INTO tab_nothing VALUES (generate_series(1,20))") + node_publisher.safe_psql("INSERT INTO tab_include SELECT generate_series(1,50)") + node_publisher.safe_psql("DELETE FROM tab_include WHERE a > 20") + node_publisher.safe_psql("UPDATE tab_include SET a = -a") + node_publisher.safe_psql("INSERT INTO tab_no_replidentity_index VALUES(1)") + node_publisher.safe_psql("INSERT INTO tab_no_col default VALUES") + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_ins") + assert result == "1052|1|1002", "check replicated inserts on subscriber" + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_rep") + assert result == "20|-20|-1", "check replicated changes on subscriber" + result = node_subscriber.safe_psql("SELECT * FROM tab_mixed") + assert ( + result == "local|1.1|foo|1\nlocal|2.2|bar|2" + ), "check replicated changes with different column order" + result = node_subscriber.safe_psql("SELECT count(*) FROM tab_nothing") + assert result == "20", "check replicated changes with REPLICA IDENTITY NOTHING" + result = node_subscriber.safe_psql( + "SELECT count(*), min(a), max(a) FROM tab_include" + ) + assert ( + result == "20|-20|-1" + ), "check replicated changes with primary key index with included columns" + assert ( + node_subscriber.safe_psql("SELECT c1 FROM tab_no_replidentity_index") == "1" + ), "value replicated to subscriber without replica identity index" + result = node_subscriber.safe_psql("SELECT count(*) FROM tab_no_col") + assert result == "2", "check replicated changes for table having no columns" + assert node_publisher.poll_query_until( + "SELECT sum(reads) > 0\n FROM pg_catalog.pg_stat_io\n WHERE backend_type = 'walsender'\n AND object = 'wal'" + ), "Timed out while waiting for the walsender to update its IO statistics" + node_publisher.safe_psql("INSERT INTO tab_full SELECT generate_series(1,10)") + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_ins") + assert ( + result == "1052|1|1002" + ), "check rows on subscriber before table drop from publication" + node_publisher.safe_psql("ALTER PUBLICATION tap_pub_ins_only DROP TABLE tab_ins") + node_publisher.safe_psql("INSERT INTO tab_ins VALUES(8888)") + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_ins") + assert ( + result == "1052|1|1002" + ), "check rows on subscriber after table drop from publication" + node_publisher.safe_psql("DELETE FROM tab_ins WHERE a = 8888") + node_publisher.safe_psql("ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_ins") + node_subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION") + node_publisher.safe_psql("CREATE TABLE temp1 (a int)") + node_publisher.safe_psql("CREATE TABLE temp2 (a int)") + node_subscriber.safe_psql("CREATE TABLE temp1 (a int)") + node_subscriber.safe_psql("CREATE TABLE temp2 (a int)") + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_temp1 FOR TABLE temp1 WITH (publish = insert)" + ) + node_publisher.safe_psql("CREATE PUBLICATION tap_pub_temp2 FOR TABLE temp2") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub_temp1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION tap_pub_temp1, tap_pub_temp2" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql("SELECT count(*) FROM temp1") + assert result == "0", "check initial rows on subscriber with multiple publications" + node_publisher.safe_psql("INSERT INTO temp1 VALUES (1)") + node_publisher.wait_for_catchup("tap_sub_temp1") + result = node_subscriber.safe_psql("SELECT count(*) FROM temp1") + assert result == "1", "check rows on subscriber with multiple publications" + node_subscriber.safe_psql("DROP SUBSCRIPTION tap_sub_temp1") + node_publisher.safe_psql("DROP PUBLICATION tap_pub_temp1") + node_publisher.safe_psql("DROP PUBLICATION tap_pub_temp2") + node_publisher.safe_psql("DROP TABLE temp1") + node_publisher.safe_psql("DROP TABLE temp2") + node_subscriber.safe_psql("DROP TABLE temp1") + node_subscriber.safe_psql("DROP TABLE temp2") + node_publisher.safe_psql("ALTER TABLE tab_full REPLICA IDENTITY FULL") + node_subscriber.safe_psql("ALTER TABLE tab_full REPLICA IDENTITY FULL") + node_publisher.safe_psql("ALTER TABLE tab_full2 REPLICA IDENTITY FULL") + node_subscriber.safe_psql("ALTER TABLE tab_full2 REPLICA IDENTITY FULL") + node_publisher.safe_psql("ALTER TABLE tab_ins REPLICA IDENTITY FULL") + node_subscriber.safe_psql("ALTER TABLE tab_ins REPLICA IDENTITY FULL") + node_publisher.safe_psql("UPDATE tab_full SET a = a * a") + node_publisher.safe_psql("UPDATE tab_full2 SET x = 'bb' WHERE x = 'b'") + node_publisher.safe_psql("UPDATE tab_mixed SET b = 'baz' WHERE a = 1") + node_publisher.safe_psql("UPDATE tab_full_pk SET b = 'bar' WHERE a = 1") + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_full") + assert ( + result == "20|1|100" + ), "update works with REPLICA IDENTITY FULL and duplicate tuples" + result = node_subscriber.safe_psql("SELECT x FROM tab_full2 ORDER BY 1") + assert ( + result == "a\nbb\nbb" + ), "update works with REPLICA IDENTITY FULL and text datums" + result = node_subscriber.safe_psql("SELECT * FROM tab_mixed ORDER BY a") + assert ( + result == "local|1.1|baz|1\nlocal|2.2|bar|2" + ), "update works with different column order and subscriber local values" + result = node_subscriber.safe_psql("SELECT * FROM tab_full_pk ORDER BY a") + assert ( + result == "1|bar\n2|baz" + ), "update works with REPLICA IDENTITY FULL and a primary key" + node_subscriber.safe_psql("DELETE FROM tab_full_pk") + node_subscriber.safe_psql("DELETE FROM tab_full WHERE a = 25") + log_location_pub = node_publisher.current_log_position() + log_location_sub = node_subscriber.current_log_position() + node_publisher.safe_psql("UPDATE tab_full_pk SET b = 'quux' WHERE a = 1") + node_publisher.safe_psql("UPDATE tab_full SET a = a + 1 WHERE a = 25") + node_publisher.safe_psql("DELETE FROM tab_full_pk WHERE a = 2") + node_publisher.wait_for_catchup("tap_sub") + assert node_subscriber.log_matches( + r"""conflict detected on relation "public.tab_full_pk": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated: remote row \(1, quux\), replica identity \(a\)=\(1\)""", + log_location_sub, + ), "update target row is missing" + assert node_subscriber.log_matches( + r"""conflict detected on relation "public.tab_full": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated: remote row \(26\), replica identity full \(25\)""", + log_location_sub, + ), "update target row is missing" + assert node_subscriber.log_matches( + r"""conflict detected on relation "public.tab_full_pk": conflict=delete_missing.*\n.*DETAIL:.* Could not find the row to be deleted: replica identity \(a\)=\(2\)""", + log_location_sub, + ), "delete target row is missing" + node_subscriber.append_conf("log_min_messages = warning") + node_subscriber.reload() + node_publisher.safe_psql( + "UPDATE tab_mixed SET b = repeat('xyzzy', 100000) WHERE a = 2" + ) + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql( + "SELECT a, length(b), c, d FROM tab_mixed ORDER BY a" + ) + assert ( + result == "1|3|1.1|local\n2|500000|2.2|local" + ), "update transmits large column value" + node_publisher.safe_psql("UPDATE tab_mixed SET c = 3.3 WHERE a = 2") + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql( + "SELECT a, length(b), c, d FROM tab_mixed ORDER BY a" + ) + assert ( + result == "1|3|1.1|local\n2|500000|3.3|local" + ), "update with non-transmitted large column value" + node_publisher.safe_psql("UPDATE tab_mixed SET b = 'bar', c = 2.2 WHERE a = 2") + node_publisher.safe_psql("ALTER TABLE tab_mixed DROP COLUMN b") + node_publisher.safe_psql("UPDATE tab_mixed SET c = 11.11 WHERE a = 1") + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT * FROM tab_mixed ORDER BY a") + assert ( + result == "local|11.11|baz|1\nlocal|2.2|bar|2" + ), "update works with dropped publisher column" + node_subscriber.safe_psql("ALTER TABLE tab_mixed DROP COLUMN d") + node_publisher.safe_psql("UPDATE tab_mixed SET c = 22.22 WHERE a = 2") + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT * FROM tab_mixed ORDER BY a") + assert ( + result == "11.11|baz|1\n22.22|bar|2" + ), "update works with dropped subscriber column" + assert not node_publisher.log_matches( + r"""QUERY STATISTICS""", + log_location_pub, + ), "log_statement_stats has not been enabled yet" + log_location_pub = node_publisher.current_log_position() + oldpid = node_publisher.safe_psql( + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" + ) + node_subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub CONNECTION '" + + publisher_connstr + + " options=''-c log_statement_stats=on'''" + ) + assert node_publisher.poll_query_until( + "SELECT pid != " + + str(oldpid) + + " FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" + ), "Timed out while waiting for apply to restart after changing CONNECTION" + assert node_publisher.wait_for_log( + r"QUERY STATISTICS", log_location_pub + ), "log_statement_stats in CONNECTION string had effect on publisher's walsender" + oldpid = node_publisher.safe_psql( + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" + ) + node_subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only WITH (copy_data = false)" + ) + assert node_publisher.poll_query_until( + "SELECT pid != " + + str(oldpid) + + " FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" + ), "Timed out while waiting for apply to restart after changing PUBLICATION" + node_publisher.safe_psql("INSERT INTO tab_ins SELECT generate_series(1001,1100)") + node_publisher.safe_psql("DELETE FROM tab_rep") + node_publisher.stop("fast") + node_publisher.start() + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_ins") + assert ( + result == "1152|1|1100" + ), "check replicated inserts after subscription publication change" + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_rep") + assert ( + result == "20|-20|-1" + ), "check changes skipped after subscription publication change" + node_publisher.safe_psql( + "ALTER PUBLICATION tap_pub_ins_only SET (publish = 'insert, delete')" + ) + node_publisher.safe_psql("ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_full") + node_publisher.safe_psql("DELETE FROM tab_ins WHERE a > 0") + node_subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION WITH (copy_data = false)" + ) + node_publisher.safe_psql("INSERT INTO tab_full VALUES(0)") + node_publisher.wait_for_catchup("tap_sub") + node_publisher.append_conf("log_min_messages = debug1") + node_publisher.reload() + log_location_pub = node_publisher.current_log_position() + node_publisher.safe_psql("INSERT INTO tab_notrep VALUES (11)") + node_publisher.wait_for_catchup("tap_sub") + assert node_publisher.log_matches( + r"""skipped replication of an empty transaction with XID""", + log_location_pub, + ), "empty transaction is skipped" + result = node_subscriber.safe_psql("SELECT count(*) FROM tab_notrep") + assert result == "0", "check non-replicated table is empty on subscriber" + node_publisher.append_conf("log_min_messages = warning") + node_publisher.reload() + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_ins") + assert result == "1052|1|1002", "check replicated deletes after alter publication" + result = node_subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_full") + assert result == "19|0|100", "check replicated insert after alter publication" + oldpid = node_publisher.safe_psql( + "SELECT pid FROM pg_stat_replication WHERE application_name = 'tap_sub' AND state = 'streaming';" + ) + node_subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub RENAME TO tap_sub_renamed") + assert node_publisher.poll_query_until( + "SELECT pid != " + + str(oldpid) + + " FROM pg_stat_replication WHERE application_name = 'tap_sub_renamed' AND state = 'streaming';" + ), "Timed out while waiting for apply to restart after renaming SUBSCRIPTION" + node_subscriber.safe_psql("DROP SUBSCRIPTION tap_sub_renamed") + result = node_subscriber.safe_psql("SELECT count(*) FROM pg_subscription") + assert result == "0", "check subscription was dropped on subscriber" + result = node_publisher.safe_psql("SELECT count(*) FROM pg_replication_slots") + assert result == "0", "check replication slot was dropped on publisher" + result = node_subscriber.safe_psql("SELECT count(*) FROM pg_subscription_rel") + assert result == "0", "check subscription relation status was dropped on subscriber" + result = node_publisher.safe_psql("SELECT count(*) FROM pg_replication_slots") + assert result == "0", "check replication slot was dropped on publisher" + result = node_subscriber.safe_psql("SELECT count(*) FROM pg_replication_origin") + assert result == "0", "check replication origin was dropped on subscriber" + node_subscriber.stop("fast") + node_publisher.stop("fast") + node_publisher.append_conf("\nwal_level=minimal\nmax_wal_senders=0\n") + node_publisher.start() + result = node_publisher.psql_capture( + "BEGIN;\nCREATE TABLE skip_wal();\n" + "CREATE PUBLICATION tap_pub2 FOR TABLE skip_wal;\nROLLBACK;" + ) + assert re.search( + r"WARNING: logical decoding must be enabled to publish logical changes", + result.stderr, + ), 'CREATE PUBLICATION while "wal_level=minimal"' diff --git a/src/test/subscription/pyt/test_002_types.py b/src/test/subscription/pyt/test_002_types.py new file mode 100644 index 0000000000000..24c27a61e9a50 --- /dev/null +++ b/src/test/subscription/pyt/test_002_types.py @@ -0,0 +1,63 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/002_types.pl. + +More complex datatypes (arrays, enums, composites, ranges, hstore, domains) +are replicated correctly by logical replication. The SQL fixtures and the +golden expected outputs are reproduced verbatim from the Perl original. +""" + +# pylint: disable=line-too-long # golden replication SQL fixtures and outputs +_DDL = "\n\tCREATE EXTENSION hstore WITH SCHEMA public;\n\tCREATE TABLE public.tst_one_array (\n\t\ta INTEGER PRIMARY KEY,\n\t\tb INTEGER[]\n\t\t);\n\tCREATE TABLE public.tst_arrays (\n\t\ta INTEGER[] PRIMARY KEY,\n\t\tb TEXT[],\n\t\tc FLOAT[],\n\t\td INTERVAL[]\n\t\t);\n\n\tCREATE TYPE public.tst_enum_t AS ENUM ('a', 'b', 'c', 'd', 'e');\n\tCREATE TABLE public.tst_one_enum (\n\t\ta INTEGER PRIMARY KEY,\n\t\tb public.tst_enum_t\n\t\t);\n\tCREATE TABLE public.tst_enums (\n\t\ta public.tst_enum_t PRIMARY KEY,\n\t\tb public.tst_enum_t[]\n\t\t);\n\n\tCREATE TYPE public.tst_comp_basic_t AS (a FLOAT, b TEXT, c INTEGER);\n\tCREATE TYPE public.tst_comp_enum_t AS (a FLOAT, b public.tst_enum_t, c INTEGER);\n\tCREATE TYPE public.tst_comp_enum_array_t AS (a FLOAT, b public.tst_enum_t[], c INTEGER);\n\tCREATE TABLE public.tst_one_comp (\n\t\ta INTEGER PRIMARY KEY,\n\t\tb public.tst_comp_basic_t\n\t\t);\n\tCREATE TABLE public.tst_comps (\n\t\ta public.tst_comp_basic_t PRIMARY KEY,\n\t\tb public.tst_comp_basic_t[]\n\t\t);\n\tCREATE TABLE public.tst_comp_enum (\n\t\ta INTEGER PRIMARY KEY,\n\t\tb public.tst_comp_enum_t\n\t\t);\n\tCREATE TABLE public.tst_comp_enum_array (\n\t\ta public.tst_comp_enum_t PRIMARY KEY,\n\t\tb public.tst_comp_enum_t[]\n\t\t);\n\tCREATE TABLE public.tst_comp_one_enum_array (\n\t\ta INTEGER PRIMARY KEY,\n\t\tb public.tst_comp_enum_array_t\n\t\t);\n\tCREATE TABLE public.tst_comp_enum_what (\n\t\ta public.tst_comp_enum_array_t PRIMARY KEY,\n\t\tb public.tst_comp_enum_array_t[]\n\t\t);\n\n\tCREATE TYPE public.tst_comp_mix_t AS (\n\t\ta public.tst_comp_basic_t,\n\t\tb public.tst_comp_basic_t[],\n\t\tc public.tst_enum_t,\n\t\td public.tst_enum_t[]\n\t\t);\n\tCREATE TABLE public.tst_comp_mix_array (\n\t\ta public.tst_comp_mix_t PRIMARY KEY,\n\t\tb public.tst_comp_mix_t[]\n\t\t);\n\tCREATE TABLE public.tst_range (\n\t\ta INTEGER PRIMARY KEY,\n\t\tb int4range\n\t);\n\tCREATE TABLE public.tst_range_array (\n\t\ta INTEGER PRIMARY KEY,\n\t\tb TSTZRANGE,\n\t\tc int8range[]\n\t);\n\tCREATE TABLE public.tst_hstore (\n\t\ta INTEGER PRIMARY KEY,\n\t\tb public.hstore\n\t);\n\n\tSET check_function_bodies=off;\n\tCREATE FUNCTION public.monot_incr(int) RETURNS bool LANGUAGE sql\n\t\tAS ' select $1 > max(a) from public.tst_dom_constr; ';\n\tCREATE DOMAIN monot_int AS int CHECK (monot_incr(VALUE));\n\tCREATE TABLE public.tst_dom_constr (a monot_int);" +_INSERT = "\n\t-- test_tbl_one_array_col\n\tINSERT INTO tst_one_array (a, b) VALUES\n\t\t(1, '{1, 2, 3}'),\n\t\t(2, '{2, 3, 1}'),\n\t\t(3, '{3, 2, 1}'),\n\t\t(4, '{4, 3, 2}'),\n\t\t(5, '{5, NULL, 3}');\n\n\t-- test_tbl_arrays\n\tINSERT INTO tst_arrays (a, b, c, d) VALUES\n\t\t('{1, 2, 3}', '{\"a\", \"b\", \"c\"}', '{1.1, 2.2, 3.3}', '{\"1 day\", \"2 days\", \"3 days\"}'),\n\t\t('{2, 3, 1}', '{\"b\", \"c\", \"a\"}', '{2.2, 3.3, 1.1}', '{\"2 minutes\", \"3 minutes\", \"1 minute\"}'),\n\t\t('{3, 1, 2}', '{\"c\", \"a\", \"b\"}', '{3.3, 1.1, 2.2}', '{\"3 years\", \"1 year\", \"2 years\"}'),\n\t\t('{4, 1, 2}', '{\"d\", \"a\", \"b\"}', '{4.4, 1.1, 2.2}', '{\"4 years\", \"1 year\", \"2 years\"}'),\n\t\t('{5, NULL, NULL}', '{\"e\", NULL, \"b\"}', '{5.5, 1.1, NULL}', '{\"5 years\", NULL, NULL}');\n\n\t-- test_tbl_single_enum\n\tINSERT INTO tst_one_enum (a, b) VALUES\n\t\t(1, 'a'),\n\t\t(2, 'b'),\n\t\t(3, 'c'),\n\t\t(4, 'd'),\n\t\t(5, NULL);\n\n\t-- test_tbl_enums\n\tINSERT INTO tst_enums (a, b) VALUES\n\t\t('a', '{b, c}'),\n\t\t('b', '{c, a}'),\n\t\t('c', '{b, a}'),\n\t\t('d', '{c, b}'),\n\t\t('e', '{d, NULL}');\n\n\t-- test_tbl_single_composites\n\tINSERT INTO tst_one_comp (a, b) VALUES\n\t\t(1, ROW(1.0, 'a', 1)),\n\t\t(2, ROW(2.0, 'b', 2)),\n\t\t(3, ROW(3.0, 'c', 3)),\n\t\t(4, ROW(4.0, 'd', 4)),\n\t\t(5, ROW(NULL, NULL, 5));\n\n\t-- test_tbl_composites\n\tINSERT INTO tst_comps (a, b) VALUES\n\t\t(ROW(1.0, 'a', 1), ARRAY[ROW(1, 'a', 1)::tst_comp_basic_t]),\n\t\t(ROW(2.0, 'b', 2), ARRAY[ROW(2, 'b', 2)::tst_comp_basic_t]),\n\t\t(ROW(3.0, 'c', 3), ARRAY[ROW(3, 'c', 3)::tst_comp_basic_t]),\n\t\t(ROW(4.0, 'd', 4), ARRAY[ROW(4, 'd', 3)::tst_comp_basic_t]),\n\t\t(ROW(5.0, 'e', NULL), ARRAY[NULL, ROW(5, NULL, 5)::tst_comp_basic_t]);\n\n\t-- test_tbl_composite_with_enums\n\tINSERT INTO tst_comp_enum (a, b) VALUES\n\t\t(1, ROW(1.0, 'a', 1)),\n\t\t(2, ROW(2.0, 'b', 2)),\n\t\t(3, ROW(3.0, 'c', 3)),\n\t\t(4, ROW(4.0, 'd', 4)),\n\t\t(5, ROW(NULL, 'e', NULL));\n\n\t-- test_tbl_composite_with_enums_array\n\tINSERT INTO tst_comp_enum_array (a, b) VALUES\n\t\t(ROW(1.0, 'a', 1), ARRAY[ROW(1, 'a', 1)::tst_comp_enum_t]),\n\t\t(ROW(2.0, 'b', 2), ARRAY[ROW(2, 'b', 2)::tst_comp_enum_t]),\n\t\t(ROW(3.0, 'c', 3), ARRAY[ROW(3, 'c', 3)::tst_comp_enum_t]),\n\t\t(ROW(4.0, 'd', 3), ARRAY[ROW(3, 'd', 3)::tst_comp_enum_t]),\n\t\t(ROW(5.0, 'e', 3), ARRAY[ROW(3, 'e', 3)::tst_comp_enum_t, NULL]);\n\n\t-- test_tbl_composite_with_single_enums_array_in_composite\n\tINSERT INTO tst_comp_one_enum_array (a, b) VALUES\n\t\t(1, ROW(1.0, '{a, b, c}', 1)),\n\t\t(2, ROW(2.0, '{a, b, c}', 2)),\n\t\t(3, ROW(3.0, '{a, b, c}', 3)),\n\t\t(4, ROW(4.0, '{c, b, d}', 4)),\n\t\t(5, ROW(5.0, '{NULL, e, NULL}', 5));\n\n\t-- test_tbl_composite_with_enums_array_in_composite\n\tINSERT INTO tst_comp_enum_what (a, b) VALUES\n\t\t(ROW(1.0, '{a, b, c}', 1), ARRAY[ROW(1, '{a, b, c}', 1)::tst_comp_enum_array_t]),\n\t\t(ROW(2.0, '{b, c, a}', 2), ARRAY[ROW(2, '{b, c, a}', 1)::tst_comp_enum_array_t]),\n\t\t(ROW(3.0, '{c, a, b}', 1), ARRAY[ROW(3, '{c, a, b}', 1)::tst_comp_enum_array_t]),\n\t\t(ROW(4.0, '{c, b, d}', 4), ARRAY[ROW(4, '{c, b, d}', 4)::tst_comp_enum_array_t]),\n\t\t(ROW(5.0, '{c, NULL, b}', NULL), ARRAY[ROW(5, '{c, e, b}', 1)::tst_comp_enum_array_t]);\n\n\t-- test_tbl_mixed_composites\n\tINSERT INTO tst_comp_mix_array (a, b) VALUES\n\t\t(ROW(\n\t\t\tROW(1,'a',1),\n\t\t\tARRAY[ROW(1,'a',1)::tst_comp_basic_t, ROW(2,'b',2)::tst_comp_basic_t],\n\t\t\t'a',\n\t\t\t'{a,b,NULL,c}'),\n\t\tARRAY[\n\t\t\tROW(\n\t\t\t\tROW(1,'a',1),\n\t\t\t\tARRAY[\n\t\t\t\t\tROW(1,'a',1)::tst_comp_basic_t,\n\t\t\t\t\tROW(2,'b',2)::tst_comp_basic_t,\n\t\t\t\t\tNULL\n\t\t\t\t\t],\n\t\t\t\t'a',\n\t\t\t\t'{a,b,c}'\n\t\t\t\t)::tst_comp_mix_t\n\t\t\t]\n\t\t);\n\n\t-- test_tbl_range\n\tINSERT INTO tst_range (a, b) VALUES\n\t\t(1, '[1, 10]'),\n\t\t(2, '[2, 20]'),\n\t\t(3, '[3, 30]'),\n\t\t(4, '[4, 40]'),\n\t\t(5, '[5, 50]');\n\n\t-- test_tbl_range_array\n\tINSERT INTO tst_range_array (a, b, c) VALUES\n\t\t(1, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'infinity'), '{\"[1,2]\", \"[10,20]\"}'),\n\t\t(2, tstzrange('Sat Aug 02 00:00:00 2014 CEST'::timestamptz, 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{\"[2,3]\", \"[20,30]\"}'),\n\t\t(3, tstzrange('Fri Aug 01 00:00:00 2014 CEST'::timestamptz, 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{\"[3,4]\"}'),\n\t\t(4, tstzrange('Thu Jul 31 00:00:00 2014 CEST'::timestamptz, 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{\"[4,5]\", NULL, \"[40,50]\"}'),\n\t\t(5, NULL, NULL);\n\n\t-- tst_hstore\n\tINSERT INTO tst_hstore (a, b) VALUES\n\t\t(1, '\"a\"=>\"1\"'),\n\t\t(2, '\"zzz\"=>\"foo\"'),\n\t\t(3, '\"123\"=>\"321\"'),\n\t\t(4, '\"yellow horse\"=>\"moaned\"');\n\n\t-- tst_dom_constr\n\tINSERT INTO tst_dom_constr VALUES (10);\n" +_SELECT = "\n\tSET timezone = '+2';\n\tSELECT a, b FROM tst_one_array ORDER BY a;\n\tSELECT a, b, c, d FROM tst_arrays ORDER BY a;\n\tSELECT a, b FROM tst_one_enum ORDER BY a;\n\tSELECT a, b FROM tst_enums ORDER BY a;\n\tSELECT a, b FROM tst_one_comp ORDER BY a;\n\tSELECT a, b FROM tst_comps ORDER BY a;\n\tSELECT a, b FROM tst_comp_enum ORDER BY a;\n\tSELECT a, b FROM tst_comp_enum_array ORDER BY a;\n\tSELECT a, b FROM tst_comp_one_enum_array ORDER BY a;\n\tSELECT a, b FROM tst_comp_enum_what ORDER BY a;\n\tSELECT a, b FROM tst_comp_mix_array ORDER BY a;\n\tSELECT a, b FROM tst_range ORDER BY a;\n\tSELECT a, b, c FROM tst_range_array ORDER BY a;\n\tSELECT a, b FROM tst_hstore ORDER BY a;\n" +_UPDATE = "\n\tUPDATE tst_one_array SET b = '{4, 5, 6}' WHERE a = 1;\n\tUPDATE tst_one_array SET b = '{4, 5, 6, 1}' WHERE a > 3;\n\tUPDATE tst_arrays SET b = '{\"1a\", \"2b\", \"3c\"}', c = '{1.0, 2.0, 3.0}', d = '{\"1 day 1 second\", \"2 days 2 seconds\", \"3 days 3 second\"}' WHERE a = '{1, 2, 3}';\n\tUPDATE tst_arrays SET b = '{\"c\", \"d\", \"e\"}', c = '{3.0, 4.0, 5.0}', d = '{\"3 day 1 second\", \"4 days 2 seconds\", \"5 days 3 second\"}' WHERE a[1] > 3;\n\tUPDATE tst_one_enum SET b = 'c' WHERE a = 1;\n\tUPDATE tst_one_enum SET b = NULL WHERE a > 3;\n\tUPDATE tst_enums SET b = '{e, NULL}' WHERE a = 'a';\n\tUPDATE tst_enums SET b = '{e, d}' WHERE a > 'c';\n\tUPDATE tst_one_comp SET b = ROW(1.0, 'A', 1) WHERE a = 1;\n\tUPDATE tst_one_comp SET b = ROW(NULL, 'x', -1) WHERE a > 3;\n\tUPDATE tst_comps SET b = ARRAY[ROW(9, 'x', -1)::tst_comp_basic_t] WHERE (a).a = 1.0;\n\tUPDATE tst_comps SET b = ARRAY[NULL, ROW(9, 'x', NULL)::tst_comp_basic_t] WHERE (a).a > 3.9;\n\tUPDATE tst_comp_enum SET b = ROW(1.0, NULL, NULL) WHERE a = 1;\n\tUPDATE tst_comp_enum SET b = ROW(4.0, 'd', 44) WHERE a > 3;\n\tUPDATE tst_comp_enum_array SET b = ARRAY[NULL, ROW(3, 'd', 3)::tst_comp_enum_t] WHERE a = ROW(1.0, 'a', 1)::tst_comp_enum_t;\n\tUPDATE tst_comp_enum_array SET b = ARRAY[ROW(1, 'a', 1)::tst_comp_enum_t, ROW(2, 'b', 2)::tst_comp_enum_t] WHERE (a).a > 3;\n\tUPDATE tst_comp_one_enum_array SET b = ROW(1.0, '{a, e, c}', NULL) WHERE a = 1;\n\tUPDATE tst_comp_one_enum_array SET b = ROW(4.0, '{c, b, d}', 4) WHERE a > 3;\n\tUPDATE tst_comp_enum_what SET b = ARRAY[NULL, ROW(1, '{a, b, c}', 1)::tst_comp_enum_array_t, ROW(NULL, '{a, e, c}', 2)::tst_comp_enum_array_t] WHERE (a).a = 1;\n\tUPDATE tst_comp_enum_what SET b = ARRAY[ROW(5, '{a, b, c}', 5)::tst_comp_enum_array_t] WHERE (a).a > 3;\n\tUPDATE tst_comp_mix_array SET b[2] = NULL WHERE ((a).a).a = 1;\n\tUPDATE tst_range SET b = '[100, 1000]' WHERE a = 1;\n\tUPDATE tst_range SET b = '(1, 90)' WHERE a > 3;\n\tUPDATE tst_range_array SET c = '{\"[100, 1000]\"}' WHERE a = 1;\n\tUPDATE tst_range_array SET b = tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'infinity'), c = '{NULL, \"[11,9999999]\"}' WHERE a > 3;\n\tUPDATE tst_hstore SET b = '\"updated\"=>\"value\"' WHERE a < 3;\n\tUPDATE tst_hstore SET b = '\"also\"=>\"updated\"' WHERE a = 3;\n" +_DELETE = "\n\tDELETE FROM tst_one_array WHERE a = 1;\n\tDELETE FROM tst_one_array WHERE b = '{2, 3, 1}';\n\tDELETE FROM tst_arrays WHERE a = '{1, 2, 3}';\n\tDELETE FROM tst_arrays WHERE a[1] = 2;\n\tDELETE FROM tst_one_enum WHERE a = 1;\n\tDELETE FROM tst_one_enum WHERE b = 'b';\n\tDELETE FROM tst_enums WHERE a = 'a';\n\tDELETE FROM tst_enums WHERE b[1] = 'b';\n\tDELETE FROM tst_one_comp WHERE a = 1;\n\tDELETE FROM tst_one_comp WHERE (b).a = 2.0;\n\tDELETE FROM tst_comps WHERE (a).b = 'a';\n\tDELETE FROM tst_comps WHERE ROW(3, 'c', 3)::tst_comp_basic_t = ANY(b);\n\tDELETE FROM tst_comp_enum WHERE a = 1;\n\tDELETE FROM tst_comp_enum WHERE (b).a = 2.0;\n\tDELETE FROM tst_comp_enum_array WHERE a = ROW(1.0, 'a', 1)::tst_comp_enum_t;\n\tDELETE FROM tst_comp_enum_array WHERE ROW(3, 'c', 3)::tst_comp_enum_t = ANY(b);\n\tDELETE FROM tst_comp_one_enum_array WHERE a = 1;\n\tDELETE FROM tst_comp_one_enum_array WHERE 'a' = ANY((b).b);\n\tDELETE FROM tst_comp_enum_what WHERE (a).a = 1;\n\tDELETE FROM tst_comp_enum_what WHERE (b[1]).b = '{c, a, b}';\n\tDELETE FROM tst_comp_mix_array WHERE ((a).a).a = 1;\n\tDELETE FROM tst_range WHERE a = 1;\n\tDELETE FROM tst_range WHERE '[10,20]' && b;\n\tDELETE FROM tst_range_array WHERE a = 1;\n\tDELETE FROM tst_range_array WHERE tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'Mon Aug 05 00:00:00 2014 CEST'::timestamptz) && b;\n\tDELETE FROM tst_hstore WHERE a = 1;\n" +_EXPECTED_INSERT = '1|{1,2,3}\n2|{2,3,1}\n3|{3,2,1}\n4|{4,3,2}\n5|{5,NULL,3}\n{1,2,3}|{a,b,c}|{1.1,2.2,3.3}|{"1 day","2 days","3 days"}\n{2,3,1}|{b,c,a}|{2.2,3.3,1.1}|{00:02:00,00:03:00,00:01:00}\n{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"}\n{4,1,2}|{d,a,b}|{4.4,1.1,2.2}|{"4 years","1 year","2 years"}\n{5,NULL,NULL}|{e,NULL,b}|{5.5,1.1,NULL}|{"5 years",NULL,NULL}\n1|a\n2|b\n3|c\n4|d\n5|\na|{b,c}\nb|{c,a}\nc|{b,a}\nd|{c,b}\ne|{d,NULL}\n1|(1,a,1)\n2|(2,b,2)\n3|(3,c,3)\n4|(4,d,4)\n5|(,,5)\n(1,a,1)|{"(1,a,1)"}\n(2,b,2)|{"(2,b,2)"}\n(3,c,3)|{"(3,c,3)"}\n(4,d,4)|{"(4,d,3)"}\n(5,e,)|{NULL,"(5,,5)"}\n1|(1,a,1)\n2|(2,b,2)\n3|(3,c,3)\n4|(4,d,4)\n5|(,e,)\n(1,a,1)|{"(1,a,1)"}\n(2,b,2)|{"(2,b,2)"}\n(3,c,3)|{"(3,c,3)"}\n(4,d,3)|{"(3,d,3)"}\n(5,e,3)|{"(3,e,3)",NULL}\n1|(1,"{a,b,c}",1)\n2|(2,"{a,b,c}",2)\n3|(3,"{a,b,c}",3)\n4|(4,"{c,b,d}",4)\n5|(5,"{NULL,e,NULL}",5)\n(1,"{a,b,c}",1)|{"(1,\\"{a,b,c}\\",1)"}\n(2,"{b,c,a}",2)|{"(2,\\"{b,c,a}\\",1)"}\n(3,"{c,a,b}",1)|{"(3,\\"{c,a,b}\\",1)"}\n(4,"{c,b,d}",4)|{"(4,\\"{c,b,d}\\",4)"}\n(5,"{c,NULL,b}",)|{"(5,\\"{c,e,b}\\",1)"}\n("(1,a,1)","{""(1,a,1)"",""(2,b,2)""}",a,"{a,b,NULL,c}")|{"(\\"(1,a,1)\\",\\"{\\"\\"(1,a,1)\\"\\",\\"\\"(2,b,2)\\"\\",NULL}\\",a,\\"{a,b,c}\\")"}\n1|[1,11)\n2|[2,21)\n3|[3,31)\n4|[4,41)\n5|[5,51)\n1|["2014-08-04 00:00:00+02",infinity)|{"[1,3)","[10,21)"}\n2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"}\n3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"}\n4|["2014-07-31 00:00:00+02","2014-08-04 00:00:00+02")|{"[4,6)",NULL,"[40,51)"}\n5||\n1|"a"=>"1"\n2|"zzz"=>"foo"\n3|"123"=>"321"\n4|"yellow horse"=>"moaned"' +_EXPECTED_UPDATE = '1|{4,5,6}\n2|{2,3,1}\n3|{3,2,1}\n4|{4,5,6,1}\n5|{4,5,6,1}\n{1,2,3}|{1a,2b,3c}|{1,2,3}|{"1 day 00:00:01","2 days 00:00:02","3 days 00:00:03"}\n{2,3,1}|{b,c,a}|{2.2,3.3,1.1}|{00:02:00,00:03:00,00:01:00}\n{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"}\n{4,1,2}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"}\n{5,NULL,NULL}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"}\n1|c\n2|b\n3|c\n4|\n5|\na|{e,NULL}\nb|{c,a}\nc|{b,a}\nd|{e,d}\ne|{e,d}\n1|(1,A,1)\n2|(2,b,2)\n3|(3,c,3)\n4|(,x,-1)\n5|(,x,-1)\n(1,a,1)|{"(9,x,-1)"}\n(2,b,2)|{"(2,b,2)"}\n(3,c,3)|{"(3,c,3)"}\n(4,d,4)|{NULL,"(9,x,)"}\n(5,e,)|{NULL,"(9,x,)"}\n1|(1,,)\n2|(2,b,2)\n3|(3,c,3)\n4|(4,d,44)\n5|(4,d,44)\n(1,a,1)|{NULL,"(3,d,3)"}\n(2,b,2)|{"(2,b,2)"}\n(3,c,3)|{"(3,c,3)"}\n(4,d,3)|{"(1,a,1)","(2,b,2)"}\n(5,e,3)|{"(1,a,1)","(2,b,2)"}\n1|(1,"{a,e,c}",)\n2|(2,"{a,b,c}",2)\n3|(3,"{a,b,c}",3)\n4|(4,"{c,b,d}",4)\n5|(4,"{c,b,d}",4)\n(1,"{a,b,c}",1)|{NULL,"(1,\\"{a,b,c}\\",1)","(,\\"{a,e,c}\\",2)"}\n(2,"{b,c,a}",2)|{"(2,\\"{b,c,a}\\",1)"}\n(3,"{c,a,b}",1)|{"(3,\\"{c,a,b}\\",1)"}\n(4,"{c,b,d}",4)|{"(5,\\"{a,b,c}\\",5)"}\n(5,"{c,NULL,b}",)|{"(5,\\"{a,b,c}\\",5)"}\n("(1,a,1)","{""(1,a,1)"",""(2,b,2)""}",a,"{a,b,NULL,c}")|{"(\\"(1,a,1)\\",\\"{\\"\\"(1,a,1)\\"\\",\\"\\"(2,b,2)\\"\\",NULL}\\",a,\\"{a,b,c}\\")",NULL}\n1|[100,1001)\n2|[2,21)\n3|[3,31)\n4|[2,90)\n5|[2,90)\n1|["2014-08-04 00:00:00+02",infinity)|{"[100,1001)"}\n2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"}\n3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"}\n4|["2014-08-04 00:00:00+02",infinity)|{NULL,"[11,10000000)"}\n5|["2014-08-04 00:00:00+02",infinity)|{NULL,"[11,10000000)"}\n1|"updated"=>"value"\n2|"updated"=>"value"\n3|"also"=>"updated"\n4|"yellow horse"=>"moaned"' +_EXPECTED_DELETE = '3|{3,2,1}\n4|{4,5,6,1}\n5|{4,5,6,1}\n{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"}\n{4,1,2}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"}\n{5,NULL,NULL}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"}\n3|c\n4|\n5|\nb|{c,a}\nd|{e,d}\ne|{e,d}\n3|(3,c,3)\n4|(,x,-1)\n5|(,x,-1)\n(2,b,2)|{"(2,b,2)"}\n(4,d,4)|{NULL,"(9,x,)"}\n(5,e,)|{NULL,"(9,x,)"}\n3|(3,c,3)\n4|(4,d,44)\n5|(4,d,44)\n(2,b,2)|{"(2,b,2)"}\n(4,d,3)|{"(1,a,1)","(2,b,2)"}\n(5,e,3)|{"(1,a,1)","(2,b,2)"}\n4|(4,"{c,b,d}",4)\n5|(4,"{c,b,d}",4)\n(2,"{b,c,a}",2)|{"(2,\\"{b,c,a}\\",1)"}\n(4,"{c,b,d}",4)|{"(5,\\"{a,b,c}\\",5)"}\n(5,"{c,NULL,b}",)|{"(5,\\"{a,b,c}\\",5)"}\n2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"}\n3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"}\n2|"updated"=>"value"\n3|"also"=>"updated"\n4|"yellow horse"=>"moaned"' + + +def test_types(create_pg): + """Complex datatypes round-trip through logical replication (insert/update/delete).""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + + publisher.safe_psql(_DDL) + subscriber.safe_psql(_DDL) + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR ALL TABLES") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' PUBLICATION tap_pub " + "WITH (slot_name = tap_sub_slot)".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + + publisher.safe_psql(_INSERT) + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql(_SELECT) == _EXPECTED_INSERT + ), "check replicated inserts on subscriber" + + publisher.safe_psql(_UPDATE) + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql(_SELECT) == _EXPECTED_UPDATE + ), "check replicated updates on subscriber" + + publisher.safe_psql(_DELETE) + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql(_SELECT) == _EXPECTED_DELETE + ), "check replicated deletes on subscriber" + + # A domain with a SQL-language constraint needs an active snapshot to apply. + publisher.safe_psql("INSERT INTO tst_dom_constr VALUES (11)") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql("SELECT sum(a) FROM tst_dom_constr") == "21" + ), "sql-function constraint on domain" + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_003_constraints.py b/src/test/subscription/pyt/test_003_constraints.py new file mode 100644 index 0000000000000..dc354c74092f8 --- /dev/null +++ b/src/test/subscription/pyt/test_003_constraints.py @@ -0,0 +1,112 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/003_constraints.pl. + +Checks that constraints (FK) are ignored and replica triggers fire on the +subscriber. +""" + +_REPLICA_TRIGGER = """ +CREATE FUNCTION filter_basic_dml_fn() RETURNS TRIGGER AS $$ +BEGIN + IF (TG_OP = 'INSERT') THEN + IF (NEW.id < 10) THEN + RETURN NEW; + ELSE + RETURN NULL; + END IF; + ELSIF (TG_OP = 'UPDATE') THEN + RETURN NULL; + ELSE + RAISE WARNING 'Unknown action'; + RETURN NULL; + END IF; +END; +$$ LANGUAGE plpgsql; +CREATE TRIGGER filter_basic_dml_trg + BEFORE INSERT OR UPDATE OF bid ON tab_fk_ref + FOR EACH ROW EXECUTE PROCEDURE filter_basic_dml_fn(); +ALTER TABLE tab_fk_ref ENABLE REPLICA TRIGGER filter_basic_dml_trg; +""" + + +def test_constraints(create_pg): + """FK constraints are ignored and replica triggers fire on the subscriber.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE tab_fk (bid int PRIMARY KEY);") + publisher.safe_psql( + "CREATE TABLE tab_fk_ref " + "(id int PRIMARY KEY, junk text, bid int REFERENCES tab_fk (bid));" + ) + + # Subscriber structure; column order intentionally different. + subscriber.safe_psql("CREATE TABLE tab_fk (bid int PRIMARY KEY);") + subscriber.safe_psql( + "CREATE TABLE tab_fk_ref " + "(id int PRIMARY KEY, bid int REFERENCES tab_fk (bid), junk text);" + ) + + publisher_connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR ALL TABLES;") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' PUBLICATION tap_pub " + "WITH (copy_data = false)".format(publisher_connstr) + ) + publisher.wait_for_catchup("tap_sub") + + publisher.safe_psql("INSERT INTO tab_fk (bid) VALUES (1);") + # "junk" large enough to force out-of-line storage. + publisher.safe_psql( + "INSERT INTO tab_fk_ref (id, bid, junk) " + "VALUES (1, 1, repeat(pi()::text,20000));" + ) + publisher.wait_for_catchup("tap_sub") + + assert ( + subscriber.safe_psql("SELECT count(*), min(bid), max(bid) FROM tab_fk;") + == "1|1|1" + ), "check replicated tab_fk inserts on subscriber" + assert ( + subscriber.safe_psql("SELECT count(*), min(bid), max(bid) FROM tab_fk_ref;") + == "1|1|1" + ), "check replicated tab_fk_ref inserts on subscriber" + + # Drop the FK on the publisher and insert; FK is not enforced on subscriber. + publisher.safe_psql("DROP TABLE tab_fk CASCADE;") + publisher.safe_psql("INSERT INTO tab_fk_ref (id, bid) VALUES (2, 2);") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql("SELECT count(*), min(bid), max(bid) FROM tab_fk_ref;") + == "2|1|2" + ), "check FK ignored on subscriber" + + subscriber.safe_psql(_REPLICA_TRIGGER) + + # Trigger skips the insert (id >= 10) on the subscriber. + publisher.safe_psql("INSERT INTO tab_fk_ref (id, bid) VALUES (10, 10);") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql("SELECT count(*), min(bid), max(bid) FROM tab_fk_ref;") + == "2|1|2" + ), "check replica insert trigger applied on subscriber" + + # Trigger skips the update. + publisher.safe_psql("UPDATE tab_fk_ref SET bid = 2 WHERE bid = 1;") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql("SELECT count(*), min(bid), max(bid) FROM tab_fk_ref;") + == "2|1|2" + ), "check replica update column trigger applied on subscriber" + + # Update on another column still fires the trigger (all columns shipped). + publisher.safe_psql("UPDATE tab_fk_ref SET id = 6 WHERE id = 1;") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql("SELECT count(*), min(id), max(id) FROM tab_fk_ref;") + == "2|1|2" + ), "check column trigger applied even on update for other column" + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_004_sync.py b/src/test/subscription/pyt/test_004_sync.py new file mode 100644 index 0000000000000..a578cd2479fbc --- /dev/null +++ b/src/test/subscription/pyt/test_004_sync.py @@ -0,0 +1,103 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/004_sync.pl. + +Logical replication table syncing. +""" + +_STARTED_QUERY = "SELECT srsubstate = 'd' FROM pg_subscription_rel;" + + +def _count(node, table): + return node.safe_psql("SELECT count(*) FROM {}".format(table)) + + +def test_sync(create_pg): + """Initial table copy across drop/recreate, refresh, and slot cleanup.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber", start=False) + subscriber.append_conf("wal_retrieve_retry_interval = 1ms") + subscriber.start() + + publisher.safe_psql("CREATE TABLE tab_rep (a int primary key)") + publisher.safe_psql("INSERT INTO tab_rep SELECT generate_series(1,10)") + subscriber.safe_psql("CREATE TABLE tab_rep (a int primary key)") + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR ALL TABLES") + + def create_sub(name="tap_sub", opts=""): + subscriber.safe_psql( + "CREATE SUBSCRIPTION {} CONNECTION '{}' PUBLICATION tap_pub{}".format( + name, connstr, opts + ) + ) + + create_sub() + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + assert _count(subscriber, "tab_rep") == "10", "initial data synced for first sub" + + # Drop subscription so that there is unreplicated data. + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + publisher.safe_psql("INSERT INTO tab_rep SELECT generate_series(11,20)") + + # Recreate: initial copy gets stuck on the unique constraint. + create_sub() + assert subscriber.poll_query_until(_STARTED_QUERY), "subscriber started sync" + subscriber.safe_psql("DELETE FROM tab_rep;") + subscriber.wait_for_subscription_sync() + assert _count(subscriber, "tab_rep") == "20", "initial data synced for second sub" + + # Another subscription for the same node pair. + create_sub("tap_sub2", " WITH (copy_data = false)") + assert subscriber.poll_query_until( + "SELECT pid IS NOT NULL FROM pg_stat_subscription " + "WHERE subname = 'tap_sub2' AND worker_type = 'apply'" + ), "subscriber started" + + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub2") + assert ( + subscriber.safe_psql("SELECT count(*) FROM pg_subscription") == "0" + ), "second and third sub are dropped" + + subscriber.safe_psql("DELETE FROM tab_rep;") + create_sub() + subscriber.wait_for_subscription_sync() + assert _count(subscriber, "tab_rep") == "20", "initial data synced for fourth sub" + + # Table added after the subscription was initialized. + subscriber.safe_psql("CREATE TABLE tab_rep_next (a int)") + publisher.safe_psql("CREATE TABLE tab_rep_next (a) AS SELECT generate_series(1,10)") + publisher.wait_for_catchup("tap_sub") + assert _count(subscriber, "tab_rep_next") == "0", "no data for table added after" + + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION") + subscriber.wait_for_subscription_sync() + assert _count(subscriber, "tab_rep_next") == "10", "added table now synced" + + publisher.safe_psql("INSERT INTO tab_rep_next SELECT generate_series(1,10)") + publisher.wait_for_catchup("tap_sub") + assert _count(subscriber, "tab_rep_next") == "20", "added table changes replicated" + + # Clean up. + publisher.safe_psql("DROP TABLE tab_rep_next") + subscriber.safe_psql("DROP TABLE tab_rep_next") + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + + # Recreate: initial copy fails on the unique constraint (same rows present). + create_sub() + assert subscriber.poll_query_until(_STARTED_QUERY), "subscriber started sync" + + # DROP SUBSCRIPTION must clean up tablesync slots on the publisher. + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + assert publisher.poll_query_until( + "SELECT count(*) = 0 FROM pg_replication_slots" + ), "DROP SUBSCRIPTION during error cleans up publisher slots" + + assert ( + subscriber.safe_psql("SELECT count(*) FROM pg_replication_origin_status") == "0" + ), "all replication origins have been cleaned up" + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_005_encoding.py b/src/test/subscription/pyt/test_005_encoding.py new file mode 100644 index 0000000000000..8768c6dea4ec1 --- /dev/null +++ b/src/test/subscription/pyt/test_005_encoding.py @@ -0,0 +1,40 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/005_encoding.pl. + +Replication between databases with different encodings. +""" + + +def test_encoding(create_pg): + """A UTF8 publisher replicates to a LATIN1 subscriber with re-encoding.""" + publisher = create_pg( + "publisher", allows_streaming="logical", extra=["--locale=C", "--encoding=UTF8"] + ) + subscriber = create_pg("subscriber", extra=["--locale=C", "--encoding=LATIN1"]) + + ddl = "CREATE TABLE test1 (a int, b text);" + publisher.safe_psql(ddl) + subscriber.safe_psql(ddl) + + publisher_connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION mypub FOR ALL TABLES;") + subscriber.safe_psql( + "CREATE SUBSCRIPTION mysub CONNECTION '{}' PUBLICATION mypub;".format( + publisher_connstr + ) + ) + + subscriber.wait_for_subscription_sync(publisher, "mysub") + + # Hand-rolled UTF-8 for "Motörhead". + publisher.safe_psql(r"INSERT INTO test1 VALUES (1, E'Mot\xc3\xb6rhead')") + publisher.wait_for_catchup("mysub") + + # LATIN1 ö is 0xf6. + assert ( + subscriber.safe_psql(r"SELECT a FROM test1 WHERE b = E'Mot\xf6rhead'") == "1" + ), "data replicated to subscriber" + + subscriber.stop() + publisher.stop() diff --git a/src/test/subscription/pyt/test_006_rewrite.py b/src/test/subscription/pyt/test_006_rewrite.py new file mode 100644 index 0000000000000..6aa6d16d149dc --- /dev/null +++ b/src/test/subscription/pyt/test_006_rewrite.py @@ -0,0 +1,50 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/006_rewrite.pl. + +Logical replication behavior with heap rewrites. +""" + + +def test_rewrite(create_pg): + """Replication survives DDL that rewrites the heap (ALTER TABLE ADD ...).""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + + ddl = "CREATE TABLE test1 (a int, b text);" + publisher.safe_psql(ddl) + subscriber.safe_psql(ddl) + + publisher_connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION mypub FOR ALL TABLES;") + subscriber.safe_psql( + "CREATE SUBSCRIPTION mysub CONNECTION '{}' PUBLICATION mypub;".format( + publisher_connstr + ) + ) + + subscriber.wait_for_subscription_sync(publisher, "mysub") + + publisher.safe_psql("INSERT INTO test1 (a, b) VALUES (1, 'one'), (2, 'two');") + publisher.wait_for_catchup("mysub") + + assert ( + subscriber.safe_psql("SELECT a, b FROM test1") == "1|one\n2|two" + ), "initial data replicated to subscriber" + + # DDL that causes a heap rewrite. + ddl2 = "ALTER TABLE test1 ADD c int NOT NULL DEFAULT 0;" + subscriber.safe_psql(ddl2) + publisher.safe_psql(ddl2) + publisher.wait_for_catchup("mysub") + + publisher.safe_psql("INSERT INTO test1 (a, b, c) VALUES (3, 'three', 33);") + publisher.wait_for_catchup("mysub") + + assert ( + subscriber.safe_psql("SELECT a, b, c FROM test1") + == "1|one|0\n2|two|0\n3|three|33" + ), "data replicated to subscriber" + + subscriber.stop() + publisher.stop() diff --git a/src/test/subscription/pyt/test_007_ddl.py b/src/test/subscription/pyt/test_007_ddl.py new file mode 100644 index 0000000000000..6218ef890f3d9 --- /dev/null +++ b/src/test/subscription/pyt/test_007_ddl.py @@ -0,0 +1,134 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/007_ddl.pl. + +Logical replication DDL behavior: disable+drop in one transaction, non-existent +publication warnings, and ALTER PUBLICATION RENAME during replication. +""" + +import re + + +def _test_swap(publisher, subscriber, table_name, pubname, appname): + """Confirm replication before, and not after, swapping publication names.""" + publisher.safe_psql("INSERT INTO {} VALUES (1);".format(table_name)) + publisher.wait_for_catchup(appname) + assert ( + subscriber.safe_psql("SELECT a FROM {}".format(table_name)) == "1" + ), "check replication worked well before renaming a publication" + + # Swap the names: pubname <-> pub_empty. + publisher.safe_psql( + "ALTER PUBLICATION {0} RENAME TO tap_pub_tmp;" + " ALTER PUBLICATION pub_empty RENAME TO {0};" + " ALTER PUBLICATION tap_pub_tmp RENAME TO pub_empty;".format(pubname) + ) + + publisher.safe_psql("INSERT INTO {} VALUES (2);".format(table_name)) + publisher.wait_for_catchup(appname) + + # The second tuple is not replicated: pubname no longer has the relation. + assert ( + subscriber.safe_psql("SELECT a FROM {} ORDER BY a".format(table_name)) == "1" + ), "check the tuple inserted after the RENAME was not replicated" + + # Restore the names (this helper may be called several times). + publisher.safe_psql( + "ALTER PUBLICATION {0} RENAME TO tap_pub_tmp;" + " ALTER PUBLICATION pub_empty RENAME TO {0};" + " ALTER PUBLICATION tap_pub_tmp RENAME TO pub_empty;".format(pubname) + ) + + +def test_ddl(create_pg): + """DDL behavior: same-txn disable/drop, missing-pub warnings, RENAME.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + connstr = publisher.connstr() + " dbname=postgres" + + ddl = "CREATE TABLE test1 (a int, b text);" + publisher.safe_psql(ddl) + subscriber.safe_psql(ddl) + + publisher.safe_psql("CREATE PUBLICATION mypub FOR ALL TABLES;") + subscriber.safe_psql( + "CREATE SUBSCRIPTION mysub CONNECTION '{}' PUBLICATION mypub;".format(connstr) + ) + publisher.wait_for_catchup("mysub") + + # Disable and drop in one transaction must not hang. + subscriber.safe_psql( + "BEGIN;\n" + "ALTER SUBSCRIPTION mysub DISABLE;\n" + "ALTER SUBSCRIPTION mysub SET (slot_name = NONE);\n" + "DROP SUBSCRIPTION mysub;\n" + "COMMIT;" + ) + + # One of the specified publications exists -> warning, succeeds. + result = subscriber.psql_capture( + "CREATE SUBSCRIPTION mysub1 CONNECTION '{}' " + "PUBLICATION mypub, non_existent_pub".format(connstr) + ) + assert re.search( + r'WARNING: publication "non_existent_pub" does not exist on the publisher', + result.stderr, + ), "Create subscription throws warning for non-existent publication" + subscriber.wait_for_subscription_sync(publisher, "mysub1") + + result = subscriber.psql_capture( + "ALTER SUBSCRIPTION mysub1 ADD PUBLICATION " + "non_existent_pub1, non_existent_pub2" + ) + assert re.search( + r'WARNING: publications "non_existent_pub1", "non_existent_pub2" ' + r"do not exist on the publisher", + result.stderr, + ), "Alter subscription add publication warns for non-existent publications" + + result = subscriber.psql_capture( + "ALTER SUBSCRIPTION mysub1 SET PUBLICATION non_existent_pub" + ) + assert re.search( + r'WARNING: publication "non_existent_pub" does not exist on the publisher', + result.stderr, + ), "Alter subscription set publication warns for non-existent publication" + + publisher.safe_psql( + "DROP PUBLICATION mypub;\nSELECT pg_drop_replication_slot('mysub');" + ) + subscriber.safe_psql("DROP SUBSCRIPTION mysub1") + + # ALTER PUBLICATION RENAME during replication. + ddl = "CREATE TABLE test2 (a int, b text);" + publisher.safe_psql(ddl) + subscriber.safe_psql(ddl) + + publisher.safe_psql( + "CREATE PUBLICATION pub_empty;\n" + "CREATE PUBLICATION pub_for_tab FOR TABLE test1;\n" + "CREATE PUBLICATION pub_for_all_tables FOR ALL TABLES;" + ) + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' " + "PUBLICATION pub_for_tab".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + + _test_swap(publisher, subscriber, "test1", "pub_for_tab", "tap_sub") + + subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub SET PUBLICATION pub_for_all_tables;" + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + + _test_swap(publisher, subscriber, "test2", "pub_for_all_tables", "tap_sub") + + publisher.safe_psql( + "DROP PUBLICATION pub_empty, pub_for_tab, pub_for_all_tables;\n" + "DROP TABLE test1, test2;" + ) + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub;\nDROP TABLE test1, test2;") + + subscriber.stop() + publisher.stop() diff --git a/src/test/subscription/pyt/test_008_diff_schema.py b/src/test/subscription/pyt/test_008_diff_schema.py new file mode 100644 index 0000000000000..de17a14b733fb --- /dev/null +++ b/src/test/subscription/pyt/test_008_diff_schema.py @@ -0,0 +1,96 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/008_diff_schema.pl. + +Behavior with a different schema on the subscriber (extra columns with local +defaults, replica-identity column added after the fact, missing columns). +""" + + +def test_diff_schema(create_pg): + """Subscriber keeps local defaults for extra columns; errors on missing ones.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE test_tab (a int primary key, b varchar)") + publisher.safe_psql("INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')") + + subscriber.safe_psql( + "CREATE TABLE test_tab (a int primary key, b text, " + "c timestamptz DEFAULT now(), d bigint DEFAULT 999, " + "e int GENERATED BY DEFAULT AS IDENTITY)" + ) + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR ALL TABLES") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' PUBLICATION tap_pub".format( + connstr + ) + ) + + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + + assert ( + subscriber.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + == "2|2|2" + ), "check initial data was copied to subscriber" + + publisher.safe_psql("UPDATE test_tab SET b = encode(sha256(b::bytea), 'hex')") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql( + "SELECT count(*), count(c), count(d = 999), count(e) FROM test_tab" + ) + == "2|2|2|2" + ), "check extra columns contain local defaults after copy" + + subscriber.safe_psql( + "UPDATE test_tab SET c = 'epoch'::timestamptz + 987654321 * interval '1s'" + ) + publisher.safe_psql("UPDATE test_tab SET b = encode(sha256(a::text::bytea), 'hex')") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql( + "SELECT count(*), count(extract(epoch from c) = 987654321), " + "count(d = 999) FROM test_tab" + ) + == "2|2|2" + ), "check extra columns contain locally changed data" + + publisher.safe_psql("INSERT INTO test_tab VALUES (3, 'baz')") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql( + "SELECT count(*), count(c), count(d = 999), count(e) FROM test_tab" + ) + == "3|3|3|3" + ), "check extra columns contain local defaults after apply" + + # Replica-identity column added on the subscriber not yet on the publisher. + publisher.safe_psql("CREATE TABLE test_tab2 (a int)") + subscriber.safe_psql("CREATE TABLE test_tab2 (a int)") + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION") + subscriber.wait_for_subscription_sync() + + subscriber.safe_psql("ALTER TABLE test_tab2 ADD COLUMN b serial PRIMARY KEY") + publisher.safe_psql("INSERT INTO test_tab2 VALUES (1)") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM test_tab2") + == "1|1|1" + ), "check replicated inserts on subscriber" + + # Missing columns on the subscriber should report an error. + publisher.safe_psql("CREATE TABLE test_tab3 (a int, b int, c int)") + subscriber.safe_psql("CREATE TABLE test_tab3 (a int)") + offset = subscriber.current_log_position() + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION") + subscriber.wait_for_log( + r"ERROR: ( [A-Z0-9]+:)? logical replication target relation " + r'"public.test_tab3" is missing replicated columns: "b", "c"', + offset, + ) + + subscriber.stop() + publisher.stop() diff --git a/src/test/subscription/pyt/test_009_matviews.py b/src/test/subscription/pyt/test_009_matviews.py new file mode 100644 index 0000000000000..432888d7e600f --- /dev/null +++ b/src/test/subscription/pyt/test_009_matviews.py @@ -0,0 +1,37 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/009_matviews.pl. + +Materialized views are not supported by logical replication, but logical +decoding does produce change information for them; make sure they are ignored. +""" + + +def test_matviews(create_pg): + """A materialized view on the publisher is not replicated and doesn't hang.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE test1 (a int PRIMARY KEY, b text)") + subscriber.safe_psql("CREATE TABLE test1 (a int PRIMARY KEY, b text);") + + publisher_connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION mypub FOR ALL TABLES;") + subscriber.safe_psql( + "CREATE SUBSCRIPTION mysub CONNECTION '{}' PUBLICATION mypub;".format( + publisher_connstr + ) + ) + + publisher.safe_psql("INSERT INTO test1 (a, b) VALUES (1, 'one'), (2, 'two');") + publisher.wait_for_catchup("mysub") + + # Create an MV with some data; its data must not be replicated. + publisher.safe_psql("CREATE MATERIALIZED VIEW testmv1 AS SELECT * FROM test1;") + publisher.wait_for_catchup("mysub") + + # There is no equivalent relation on the subscriber, but MV data is not + # replicated, so this does not hang. (bug #15044) + + subscriber.stop() + publisher.stop() diff --git a/src/test/subscription/pyt/test_010_truncate.py b/src/test/subscription/pyt/test_010_truncate.py new file mode 100644 index 0000000000000..4251b43412291 --- /dev/null +++ b/src/test/subscription/pyt/test_010_truncate.py @@ -0,0 +1,138 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/010_truncate.pl.""" + + +def _count(node, table, col="a"): + return node.safe_psql( + "SELECT count(*), min({0}), max({0}) FROM {1}".format(col, table) + ) + + +def test_truncate(create_pg): + """TRUNCATE replication across publications, FKs, sync rep and multi-sub.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber", start=False) + subscriber.append_conf("max_logical_replication_workers = 6") + subscriber.start() + + connstr = publisher.connstr() + " dbname=postgres" + + for tab in ("tab1", "tab2", "tab3"): + publisher.safe_psql("CREATE TABLE {} (a int PRIMARY KEY)".format(tab)) + subscriber.safe_psql("CREATE TABLE {} (a int PRIMARY KEY)".format(tab)) + publisher.safe_psql("CREATE TABLE tab4 (x int PRIMARY KEY, y int REFERENCES tab3)") + subscriber.safe_psql("CREATE TABLE tab4 (x int PRIMARY KEY, y int REFERENCES tab3)") + + subscriber.safe_psql("CREATE SEQUENCE seq1 OWNED BY tab1.a") + subscriber.safe_psql("ALTER SEQUENCE seq1 START 101") + + publisher.safe_psql("CREATE PUBLICATION pub1 FOR TABLE tab1") + publisher.safe_psql( + "CREATE PUBLICATION pub2 FOR TABLE tab2 WITH (publish = insert)" + ) + publisher.safe_psql("CREATE PUBLICATION pub3 FOR TABLE tab3, tab4") + for sub, pub in (("sub1", "pub1"), ("sub2", "pub2"), ("sub3", "pub3")): + subscriber.safe_psql( + "CREATE SUBSCRIPTION {} CONNECTION '{}' PUBLICATION {}".format( + sub, connstr, pub + ) + ) + + subscriber.wait_for_subscription_sync() + + subscriber.safe_psql("INSERT INTO tab1 VALUES (1), (2), (3)") + publisher.wait_for_catchup("sub1") + + publisher.safe_psql("TRUNCATE tab1") + publisher.wait_for_catchup("sub1") + assert _count(subscriber, "tab1") == "0||", "truncate replicated" + assert ( + subscriber.safe_psql("SELECT nextval('seq1')") == "1" + ), "sequence not restarted" + + publisher.safe_psql("TRUNCATE tab1 RESTART IDENTITY") + publisher.wait_for_catchup("sub1") + assert ( + subscriber.safe_psql("SELECT nextval('seq1')") == "101" + ), "truncate restarted identities" + + # Publication that does not replicate truncate. + subscriber.safe_psql("INSERT INTO tab2 VALUES (1), (2), (3)") + publisher.safe_psql("TRUNCATE tab2") + publisher.wait_for_catchup("sub2") + assert _count(subscriber, "tab2") == "3|1|3", "truncate not replicated" + + publisher.safe_psql("ALTER PUBLICATION pub2 SET (publish = 'insert, truncate')") + publisher.safe_psql("TRUNCATE tab2") + publisher.wait_for_catchup("sub2") + assert _count(subscriber, "tab2") == "0||", "truncate replicated after pub change" + + # Multiple tables connected by foreign keys. + subscriber.safe_psql("INSERT INTO tab3 VALUES (1), (2), (3)") + subscriber.safe_psql("INSERT INTO tab4 VALUES (11, 1), (111, 1), (22, 2)") + publisher.safe_psql("TRUNCATE tab3, tab4") + publisher.wait_for_catchup("sub3") + assert _count(subscriber, "tab3") == "0||", "truncate of multiple tables replicated" + assert _count(subscriber, "tab4", "x") == "0||", "truncate of multiple tables" + + # Truncate of multiple tables, some not published. + subscriber.safe_psql("DROP SUBSCRIPTION sub2") + publisher.safe_psql("DROP PUBLICATION pub2") + subscriber.safe_psql("INSERT INTO tab1 VALUES (1), (2), (3)") + subscriber.safe_psql("INSERT INTO tab2 VALUES (1), (2), (3)") + publisher.safe_psql("TRUNCATE tab1, tab2") + publisher.wait_for_catchup("sub1") + assert _count(subscriber, "tab1") == "0||", "truncate, some not published" + assert _count(subscriber, "tab2") == "3|1|3", "truncate, some not published" + + # Synchronous logical replication. + _test_sync_rep(publisher, subscriber) + + # Multiple subscriptions for a single table. + _test_multi_sub(publisher, subscriber, connstr) + + assert ( + subscriber.safe_psql( + "SELECT deadlocks FROM pg_stat_database WHERE datname='postgres'" + ) + == "0" + ), "no deadlocks detected" + + +def _test_sync_rep(publisher, subscriber): + publisher.safe_psql("ALTER SYSTEM SET synchronous_standby_names TO 'sub1'") + publisher.safe_psql("SELECT pg_reload_conf()") + publisher.safe_psql("INSERT INTO tab1 VALUES (1), (2), (3)") + publisher.wait_for_catchup("sub1") + assert ( + _count(subscriber, "tab1") == "3|1|3" + ), "check synchronous logical replication" + publisher.safe_psql("TRUNCATE tab1") + publisher.wait_for_catchup("sub1") + assert _count(subscriber, "tab1") == "0||", "truncate in synchronous logical rep" + publisher.safe_psql("ALTER SYSTEM RESET synchronous_standby_names") + publisher.safe_psql("SELECT pg_reload_conf()") + + +def _test_multi_sub(publisher, subscriber, connstr): + publisher.safe_psql("CREATE TABLE tab5 (a int)") + subscriber.safe_psql("CREATE TABLE tab5 (a int)") + publisher.safe_psql("CREATE PUBLICATION pub5 FOR TABLE tab5") + for sub in ("sub5_1", "sub5_2"): + subscriber.safe_psql( + "CREATE SUBSCRIPTION {} CONNECTION '{}' PUBLICATION pub5".format( + sub, connstr + ) + ) + subscriber.wait_for_subscription_sync() + + publisher.safe_psql("INSERT INTO tab5 VALUES (1), (2), (3)") + publisher.wait_for_catchup("sub5_1") + publisher.wait_for_catchup("sub5_2") + assert _count(subscriber, "tab5") == "6|1|3", "insert replicated for multiple subs" + + publisher.safe_psql("TRUNCATE tab5") + publisher.wait_for_catchup("sub5_1") + publisher.wait_for_catchup("sub5_2") + assert _count(subscriber, "tab5") == "0||", "truncate replicated for multiple subs" diff --git a/src/test/subscription/pyt/test_011_generated.py b/src/test/subscription/pyt/test_011_generated.py new file mode 100644 index 0000000000000..7011ee5839958 --- /dev/null +++ b/src/test/subscription/pyt/test_011_generated.py @@ -0,0 +1,221 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/011_generated.pl. + +Generated columns in logical replication, including publish_generated_columns, +column lists, and replication into generated subscriber columns. +""" + +_TRIGGER = """ +CREATE FUNCTION tab1_trigger_func() RETURNS trigger +LANGUAGE plpgsql AS $$ +BEGIN + NEW.d := NEW.a + 10; + RETURN NEW; +END $$; + +CREATE TRIGGER test1 BEFORE INSERT OR UPDATE ON tab1 + FOR EACH ROW + EXECUTE PROCEDURE tab1_trigger_func(); + +ALTER TABLE tab1 ENABLE REPLICA TRIGGER test1; +""" + + +def test_generated(create_pg): + """Generated-column replication across the documented scenarios.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + connstr = publisher.connstr() + " dbname=postgres" + + _test_basic(publisher, subscriber, connstr) + _test_gen_to_nogen(publisher, subscriber, connstr) + _test_column_lists(publisher, subscriber, connstr) + _test_into_generated(publisher, subscriber, connstr) + + +def _test_basic(publisher, subscriber, connstr): + publisher.safe_psql( + "CREATE TABLE tab1 (a int PRIMARY KEY, " + "b int GENERATED ALWAYS AS (a * 2) STORED, " + "c int GENERATED ALWAYS AS (a * 3) VIRTUAL)" + ) + subscriber.safe_psql( + "CREATE TABLE tab1 (a int PRIMARY KEY, " + "b int GENERATED ALWAYS AS (a * 22) STORED, " + "c int GENERATED ALWAYS AS (a * 33) VIRTUAL, d int)" + ) + publisher.safe_psql("INSERT INTO tab1 (a) VALUES (1), (2), (3)") + publisher.safe_psql("CREATE PUBLICATION pub1 FOR ALL TABLES") + subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '{}' PUBLICATION pub1".format(connstr) + ) + subscriber.wait_for_subscription_sync() + + assert subscriber.safe_psql("SELECT a, b, c FROM tab1") == ( + "1|22|33\n2|44|66\n3|66|99" + ), "generated columns initial sync" + + publisher.safe_psql("INSERT INTO tab1 VALUES (4), (5)") + publisher.safe_psql("UPDATE tab1 SET a = 6 WHERE a = 5") + publisher.wait_for_catchup("sub1") + assert subscriber.safe_psql("SELECT * FROM tab1") == ( + "1|22|33|\n2|44|66|\n3|66|99|\n4|88|132|\n6|132|198|" + ), "generated columns replicated" + + subscriber.safe_psql(_TRIGGER) + publisher.safe_psql("INSERT INTO tab1 VALUES (7), (8)") + publisher.safe_psql("UPDATE tab1 SET a = 9 WHERE a = 7") + publisher.wait_for_catchup("sub1") + assert subscriber.safe_psql("SELECT * FROM tab1 ORDER BY 1") == ( + "1|22|33|\n2|44|66|\n3|66|99|\n4|88|132|\n6|132|198|\n" + "8|176|264|18\n9|198|297|19" + ), "generated columns replicated with trigger" + + subscriber.safe_psql("DROP SUBSCRIPTION sub1") + publisher.safe_psql("DROP PUBLICATION pub1") + + +def _test_gen_to_nogen(publisher, subscriber, connstr): + # publish_generated_columns 'none' (pub1/sub1, postgres) vs 'stored' + # (pub2/sub2, test_pgc_true database). + subscriber.safe_psql("CREATE DATABASE test_pgc_true") + publisher.safe_psql( + "CREATE TABLE tab_gen_to_nogen " + "(a int, b int GENERATED ALWAYS AS (a * 2) STORED);" + " INSERT INTO tab_gen_to_nogen (a) VALUES (1), (2), (3);" + " CREATE PUBLICATION regress_pub1_gen_to_nogen FOR TABLE tab_gen_to_nogen" + " WITH (publish_generated_columns = none);" + " CREATE PUBLICATION regress_pub2_gen_to_nogen FOR TABLE tab_gen_to_nogen" + " WITH (publish_generated_columns = stored);" + ) + subscriber.safe_psql( + "CREATE TABLE tab_gen_to_nogen (a int, b int);" + " CREATE SUBSCRIPTION regress_sub1_gen_to_nogen CONNECTION '{}'" + " PUBLICATION regress_pub1_gen_to_nogen WITH (copy_data = true);".format( + connstr + ) + ) + subscriber.safe_psql( + "CREATE TABLE tab_gen_to_nogen (a int, b int);" + " CREATE SUBSCRIPTION regress_sub2_gen_to_nogen CONNECTION '{}'" + " PUBLICATION regress_pub2_gen_to_nogen WITH (copy_data = true);".format( + connstr + ), + dbname="test_pgc_true", + ) + subscriber.wait_for_subscription_sync( + publisher, "regress_sub1_gen_to_nogen", "postgres" + ) + subscriber.wait_for_subscription_sync( + publisher, "regress_sub2_gen_to_nogen", "test_pgc_true" + ) + + assert ( + subscriber.safe_psql("SELECT a, b FROM tab_gen_to_nogen ORDER BY a") + == "1|\n2|\n3|" + ), "initial sync, publish_generated_columns=none" + assert ( + subscriber.safe_psql( + "SELECT a, b FROM tab_gen_to_nogen ORDER BY a", dbname="test_pgc_true" + ) + == "1|2\n2|4\n3|6" + ), "initial sync, publish_generated_columns=stored" + + publisher.safe_psql("INSERT INTO tab_gen_to_nogen VALUES (4), (5)") + publisher.wait_for_catchup("regress_sub1_gen_to_nogen") + assert ( + subscriber.safe_psql("SELECT a, b FROM tab_gen_to_nogen ORDER BY a") + == "1|\n2|\n3|\n4|\n5|" + ), "incremental, publish_generated_columns=none" + publisher.wait_for_catchup("regress_sub2_gen_to_nogen") + assert ( + subscriber.safe_psql( + "SELECT a, b FROM tab_gen_to_nogen ORDER BY a", dbname="test_pgc_true" + ) + == "1|2\n2|4\n3|6\n4|8\n5|10" + ), "incremental, publish_generated_columns=stored" + + subscriber.safe_psql("DROP SUBSCRIPTION regress_sub1_gen_to_nogen") + subscriber.safe_psql( + "DROP SUBSCRIPTION regress_sub2_gen_to_nogen", dbname="test_pgc_true" + ) + publisher.safe_psql( + "DROP PUBLICATION regress_pub1_gen_to_nogen;" + " DROP PUBLICATION regress_pub2_gen_to_nogen;" + ) + subscriber.safe_psql("DROP table tab_gen_to_nogen", dbname="test_pgc_true") + subscriber.safe_psql("DROP DATABASE test_pgc_true") + + +def _test_column_lists(publisher, subscriber, connstr): + # Column lists take precedence over publish_generated_columns. + publisher.safe_psql( + "CREATE TABLE tab2 (a int, gen1 int GENERATED ALWAYS AS (a * 2) STORED);" + " INSERT INTO tab2 (a) VALUES (1), (2);" + " CREATE PUBLICATION pub1 FOR table tab2(gen1)" + " WITH (publish_generated_columns=none);" + ) + subscriber.safe_psql( + "CREATE TABLE tab2 (a int, gen1 int);" + " CREATE SUBSCRIPTION sub1 CONNECTION '{}' PUBLICATION pub1" + " WITH (copy_data = true);".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "sub1") + assert ( + subscriber.safe_psql("SELECT * FROM tab2 ORDER BY gen1") == "|2\n|4" + ), "tab2 initial sync, publish_generated_columns=none" + publisher.safe_psql("INSERT INTO tab2 VALUES (3), (4)") + publisher.wait_for_catchup("sub1") + assert ( + subscriber.safe_psql("SELECT * FROM tab2 ORDER BY gen1") == "|2\n|4\n|6\n|8" + ), "tab2 incremental, publish_generated_columns=none" + subscriber.safe_psql("DROP SUBSCRIPTION sub1") + publisher.safe_psql("DROP PUBLICATION pub1") + + # Only column-list columns are published even with 'stored'. + publisher.safe_psql( + "CREATE TABLE tab3 (a int, gen1 int GENERATED ALWAYS AS (a * 2) STORED," + " gen2 int GENERATED ALWAYS AS (a * 2) STORED);" + " INSERT INTO tab3 (a) VALUES (1), (2);" + " CREATE PUBLICATION pub1 FOR table tab3(gen1)" + " WITH (publish_generated_columns=stored);" + ) + subscriber.safe_psql( + "CREATE TABLE tab3 (a int, gen1 int, gen2 int);" + " CREATE SUBSCRIPTION sub1 CONNECTION '{}' PUBLICATION pub1" + " WITH (copy_data = true);".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "sub1") + assert ( + subscriber.safe_psql("SELECT * FROM tab3 ORDER BY gen1") == "|2|\n|4|" + ), "tab3 initial sync, publish_generated_columns=stored" + publisher.safe_psql("INSERT INTO tab3 VALUES (3), (4)") + publisher.wait_for_catchup("sub1") + assert subscriber.safe_psql("SELECT * FROM tab3 ORDER BY gen1") == ( + "|2|\n|4|\n|6|\n|8|" + ), "tab3 incremental, publish_generated_columns=stored" + subscriber.safe_psql("DROP SUBSCRIPTION sub1") + publisher.safe_psql("DROP PUBLICATION pub1") + + +def _test_into_generated(publisher, subscriber, connstr): + # Replicating into a generated subscriber column is an error. + publisher.safe_psql( + "CREATE TABLE t1(c1 int, c2 int, c3 int GENERATED ALWAYS AS (c1 * 2) STORED);" + " CREATE PUBLICATION pub1 for table t1(c1, c2, c3);" + " INSERT INTO t1 VALUES (1);" + ) + subscriber.safe_psql( + "CREATE TABLE t1(c1 int, c2 int GENERATED ALWAYS AS (c1 + 2) STORED," + " c3 int GENERATED ALWAYS AS (c1 + 2) STORED);" + " CREATE SUBSCRIPTION sub1 CONNECTION '{}' PUBLICATION pub1;".format(connstr) + ) + offset = subscriber.current_log_position() + subscriber.wait_for_log( + r'ERROR: ( [A-Z0-9]+:)? logical replication target relation "public.t1" ' + r'has incompatible generated columns: "c2", "c3"', + offset, + ) + subscriber.safe_psql("DROP SUBSCRIPTION sub1") + publisher.safe_psql("DROP PUBLICATION pub1") diff --git a/src/test/subscription/pyt/test_012_collation.py b/src/test/subscription/pyt/test_012_collation.py new file mode 100644 index 0000000000000..ff4ae91c3d51a --- /dev/null +++ b/src/test/subscription/pyt/test_012_collation.py @@ -0,0 +1,68 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/012_collation.pl. + +Collations, in particular nondeterministic ones (ICU only). +""" + +import os + +import pytest + +pytestmark = pytest.mark.skipif( + os.environ.get("with_icu") != "yes", reason="ICU not supported by this build" +) + + +def test_collation(create_pg): + """A nondeterministic collation key is matched correctly during apply.""" + publisher = create_pg( + "publisher", allows_streaming="logical", extra=["--locale=C", "--encoding=UTF8"] + ) + subscriber = create_pg("subscriber", extra=["--locale=C", "--encoding=UTF8"]) + + connstr = publisher.connstr() + " dbname=postgres" + + subscriber.safe_psql( + "CREATE COLLATION ctest_nondet " + "(provider = icu, locale = 'und', deterministic = false)" + ) + + # Table with replica identity index. The publisher and subscriber rows are + # collation-wise equal but byte-wise different (different normal forms). + publisher.safe_psql("CREATE TABLE tab1 (a text PRIMARY KEY, b text)") + publisher.safe_psql(r"INSERT INTO tab1 VALUES (U&'\00E4bc', 'foo')") + subscriber.safe_psql( + "CREATE TABLE tab1 (a text COLLATE ctest_nondet PRIMARY KEY, b text)" + ) + subscriber.safe_psql(r"INSERT INTO tab1 VALUES (U&'\0061\0308bc', 'foo')") + + # Table with replica identity full. + publisher.safe_psql("CREATE TABLE tab2 (a text, b text)") + publisher.safe_psql("ALTER TABLE tab2 REPLICA IDENTITY FULL") + publisher.safe_psql(r"INSERT INTO tab2 VALUES (U&'\00E4bc', 'foo')") + subscriber.safe_psql("CREATE TABLE tab2 (a text COLLATE ctest_nondet, b text)") + subscriber.safe_psql("ALTER TABLE tab2 REPLICA IDENTITY FULL") + subscriber.safe_psql(r"INSERT INTO tab2 VALUES (U&'\0061\0308bc', 'foo')") + + publisher.safe_psql("CREATE PUBLICATION pub1 FOR ALL TABLES") + subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '{}' PUBLICATION pub1 " + "WITH (copy_data = false)".format(connstr) + ) + publisher.wait_for_catchup("sub1") + + # Replica identity index: the subscriber must find the row via the + # nondeterministic collation. + publisher.safe_psql("UPDATE tab1 SET b = 'bar' WHERE b = 'foo'") + publisher.wait_for_catchup("sub1") + assert ( + subscriber.safe_psql("SELECT b FROM tab1") == "bar" + ), "update with primary key with nondeterministic collation" + + # Replica identity full. + publisher.safe_psql("UPDATE tab2 SET b = 'bar' WHERE b = 'foo'") + publisher.wait_for_catchup("sub1") + assert ( + subscriber.safe_psql("SELECT b FROM tab2") == "bar" + ), "update with replica identity full with nondeterministic collation" diff --git a/src/test/subscription/pyt/test_013_partition.py b/src/test/subscription/pyt/test_013_partition.py new file mode 100644 index 0000000000000..af59e80c288a6 --- /dev/null +++ b/src/test/subscription/pyt/test_013_partition.py @@ -0,0 +1,517 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/subscription/t/013_partition.pl. + +Logical replication into partitioned tables: replication via leaf and +via root identity (publish_via_partition_root), schema/identity mismatches, +update/delete row routing across partitions, and conflict-detection log +messages (update_missing / delete_missing / update_origin_differs). +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_013_partition(create_pg): + """Generated golden port of 013_partition.""" + node_publisher = create_pg("publisher", allows_streaming="logical", start=False) + node_publisher.start() + node_subscriber1 = create_pg("subscriber1", start=False) + node_subscriber1.start() + node_subscriber2 = create_pg("subscriber2", start=False) + node_subscriber2.start() + publisher_connstr = node_publisher.connstr() + " dbname=postgres" + node_publisher.safe_psql("CREATE PUBLICATION pub1") + node_publisher.safe_psql("CREATE PUBLICATION pub_all FOR ALL TABLES") + node_publisher.safe_psql( + "CREATE TABLE tab1 (a int PRIMARY KEY, b text) PARTITION BY LIST (a)" + ) + node_publisher.safe_psql("CREATE TABLE tab1_1 (b text, a int NOT NULL)") + node_publisher.safe_psql( + "ALTER TABLE tab1 ATTACH PARTITION tab1_1 FOR VALUES IN (1, 2, 3)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab1_2 PARTITION OF tab1 FOR VALUES IN (4, 5, 6)" + ) + node_publisher.safe_psql("CREATE TABLE tab1_def PARTITION OF tab1 DEFAULT") + node_publisher.safe_psql("ALTER PUBLICATION pub1 ADD TABLE tab1, tab1_1") + node_subscriber1.safe_psql( + "CREATE TABLE tab1 (c text, a int PRIMARY KEY, b text) PARTITION BY LIST (a)" + ) + node_subscriber1.safe_psql("CREATE INDEX tab1_c_brin_idx ON tab1 USING brin (c)") + node_subscriber1.safe_psql( + "CREATE TABLE tab1_1 (b text, c text DEFAULT 'sub1_tab1', a int NOT NULL)" + ) + node_subscriber1.safe_psql( + "ALTER TABLE tab1 ATTACH PARTITION tab1_1 FOR VALUES IN (1, 2, 3)" + ) + node_subscriber1.safe_psql( + "CREATE TABLE tab1_2 PARTITION OF tab1 (c DEFAULT 'sub1_tab1') FOR VALUES IN (4, 5, 6) PARTITION BY LIST (a)" + ) + node_subscriber1.safe_psql("CREATE TABLE tab1_2_1 (c text, b text, a int NOT NULL)") + node_subscriber1.safe_psql( + "ALTER TABLE tab1_2 ATTACH PARTITION tab1_2_1 FOR VALUES IN (5)" + ) + node_subscriber1.safe_psql( + "CREATE TABLE tab1_2_2 PARTITION OF tab1_2 FOR VALUES IN (4, 6)" + ) + node_subscriber1.safe_psql( + "CREATE TABLE tab1_def PARTITION OF tab1 (c DEFAULT 'sub1_tab1') DEFAULT" + ) + node_subscriber1.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub1" + ) + node_subscriber1.safe_psql( + "CREATE TABLE sub1_trigger_activity (tgtab text, tgop text,\n tgwhen text, tglevel text, olda int, newa int);\nCREATE FUNCTION sub1_trigger_activity_func() RETURNS TRIGGER AS $$\nBEGIN\n IF (TG_OP = 'INSERT') THEN\n INSERT INTO public.sub1_trigger_activity\n SELECT TG_RELNAME, TG_OP, TG_WHEN, TG_LEVEL, NULL, NEW.a;\n ELSIF (TG_OP = 'UPDATE') THEN\n INSERT INTO public.sub1_trigger_activity\n SELECT TG_RELNAME, TG_OP, TG_WHEN, TG_LEVEL, OLD.a, NEW.a;\n END IF;\n RETURN NULL;\nEND;\n$$ LANGUAGE plpgsql;\nCREATE TRIGGER sub1_tab1_log_op_trigger\n AFTER INSERT OR UPDATE ON tab1\n FOR EACH ROW EXECUTE PROCEDURE sub1_trigger_activity_func();\nALTER TABLE ONLY tab1 ENABLE REPLICA TRIGGER sub1_tab1_log_op_trigger;\nCREATE TRIGGER sub1_tab1_2_log_op_trigger\n AFTER INSERT OR UPDATE ON tab1_2\n FOR EACH ROW EXECUTE PROCEDURE sub1_trigger_activity_func();\nALTER TABLE ONLY tab1_2 ENABLE REPLICA TRIGGER sub1_tab1_2_log_op_trigger;\nCREATE TRIGGER sub1_tab1_2_2_log_op_trigger\n AFTER INSERT OR UPDATE ON tab1_2_2\n FOR EACH ROW EXECUTE PROCEDURE sub1_trigger_activity_func();\nALTER TABLE ONLY tab1_2_2 ENABLE REPLICA TRIGGER sub1_tab1_2_2_log_op_trigger;" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab1 (a int PRIMARY KEY, c text DEFAULT 'sub2_tab1', b text)" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab1_1 (a int PRIMARY KEY, c text DEFAULT 'sub2_tab1_1', b text)" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab1_2 (a int PRIMARY KEY, c text DEFAULT 'sub2_tab1_2', b text)" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab1_def (a int PRIMARY KEY, b text, c text DEFAULT 'sub2_tab1_def')" + ) + node_subscriber2.safe_psql( + "CREATE SUBSCRIPTION sub2 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub_all" + ) + node_subscriber2.safe_psql( + "CREATE TABLE sub2_trigger_activity (tgtab text,\n tgop text, tgwhen text, tglevel text, olda int, newa int);\nCREATE FUNCTION sub2_trigger_activity_func() RETURNS TRIGGER AS $$\nBEGIN\n IF (TG_OP = 'INSERT') THEN\n INSERT INTO public.sub2_trigger_activity\n SELECT TG_RELNAME, TG_OP, TG_WHEN, TG_LEVEL, NULL, NEW.a;\n ELSIF (TG_OP = 'UPDATE') THEN\n INSERT INTO public.sub2_trigger_activity\n SELECT TG_RELNAME, TG_OP, TG_WHEN, TG_LEVEL, OLD.a, NEW.a;\n END IF;\n RETURN NULL;\nEND;\n$$ LANGUAGE plpgsql;\nCREATE TRIGGER sub2_tab1_log_op_trigger\n AFTER INSERT OR UPDATE ON tab1\n FOR EACH ROW EXECUTE PROCEDURE sub2_trigger_activity_func();\nALTER TABLE ONLY tab1 ENABLE REPLICA TRIGGER sub2_tab1_log_op_trigger;\nCREATE TRIGGER sub2_tab1_2_log_op_trigger\n AFTER INSERT OR UPDATE ON tab1_2\n FOR EACH ROW EXECUTE PROCEDURE sub2_trigger_activity_func();\nALTER TABLE ONLY tab1_2 ENABLE REPLICA TRIGGER sub2_tab1_2_log_op_trigger;" + ) + node_subscriber1.wait_for_subscription_sync() + node_subscriber2.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO tab1 VALUES (1)") + node_publisher.safe_psql("INSERT INTO tab1_1 (a) VALUES (3)") + node_publisher.safe_psql("INSERT INTO tab1_2 VALUES (5)") + node_publisher.safe_psql("INSERT INTO tab1 VALUES (0)") + node_publisher.wait_for_catchup("sub1") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT c, a FROM tab1 ORDER BY 1, 2") + assert ( + result == "sub1_tab1|0\nsub1_tab1|1\nsub1_tab1|3\nsub1_tab1|5" + ), "inserts into tab1 and its partitions replicated" + result = node_subscriber1.safe_psql("SELECT a FROM tab1_2_1 ORDER BY 1") + assert result == "5", "inserts into tab1_2 replicated into tab1_2_1 correctly" + result = node_subscriber1.safe_psql("SELECT a FROM tab1_2_2 ORDER BY 1") + assert result == "", "inserts into tab1_2 replicated into tab1_2_2 correctly" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1_1 ORDER BY 1, 2") + assert result == "sub2_tab1_1|1\nsub2_tab1_1|3", "inserts into tab1_1 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1_2 ORDER BY 1, 2") + assert result == "sub2_tab1_2|5", "inserts into tab1_2 replicated" + result = node_subscriber2.safe_psql( + "SELECT * FROM sub2_trigger_activity ORDER BY tgtab, tgop, tgwhen, olda, newa;" + ) + assert ( + result == "tab1_2|INSERT|AFTER|ROW||5" + ), "check replica insert after trigger applied on subscriber" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1_def ORDER BY 1, 2") + assert result == "sub2_tab1_def|0", "inserts into tab1_def replicated" + node_publisher.safe_psql("UPDATE tab1 SET a = 2 WHERE a = 1") + node_publisher.safe_psql("UPDATE tab1 SET a = 6 WHERE a = 5") + node_publisher.safe_psql("UPDATE tab1 SET a = 4 WHERE a = 6") + node_publisher.safe_psql("UPDATE tab1 SET a = 6 WHERE a = 4") + node_publisher.wait_for_catchup("sub1") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT c, a FROM tab1 ORDER BY 1, 2") + assert ( + result == "sub1_tab1|0\nsub1_tab1|2\nsub1_tab1|3\nsub1_tab1|6" + ), "update of tab1_1, tab1_2 replicated" + result = node_subscriber1.safe_psql("SELECT a FROM tab1_2_1 ORDER BY 1") + assert result == "", "updates of tab1_2 replicated into tab1_2_1 correctly" + result = node_subscriber1.safe_psql("SELECT a FROM tab1_2_2 ORDER BY 1") + assert result == "6", "updates of tab1_2 replicated into tab1_2_2 correctly" + result = node_subscriber1.safe_psql( + "SELECT * FROM sub1_trigger_activity ORDER BY tgtab, tgop, tgwhen, olda, newa;" + ) + assert ( + result + == "tab1_2_2|INSERT|AFTER|ROW||6\ntab1_2_2|UPDATE|AFTER|ROW|4|6\ntab1_2_2|UPDATE|AFTER|ROW|6|4" + ), "check replica update after trigger applied on subscriber" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1_1 ORDER BY 1, 2") + assert result == "sub2_tab1_1|2\nsub2_tab1_1|3", "update of tab1_1 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1_2 ORDER BY 1, 2") + assert result == "sub2_tab1_2|6", "tab1_2 updated" + result = node_subscriber2.safe_psql( + "SELECT * FROM sub2_trigger_activity ORDER BY tgtab, tgop, tgwhen, olda, newa;" + ) + assert ( + result + == "tab1_2|INSERT|AFTER|ROW||5\ntab1_2|UPDATE|AFTER|ROW|4|6\ntab1_2|UPDATE|AFTER|ROW|5|6\ntab1_2|UPDATE|AFTER|ROW|6|4" + ), "check replica update after trigger applied on subscriber" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1_def ORDER BY 1") + assert result == "sub2_tab1_def|0", "tab1_def unchanged" + node_publisher.safe_psql("UPDATE tab1 SET a = 1 WHERE a = 0") + node_publisher.safe_psql("UPDATE tab1 SET a = 4 WHERE a = 1") + node_publisher.wait_for_catchup("sub1") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT c, a FROM tab1 ORDER BY 1, 2") + assert ( + result == "sub1_tab1|2\nsub1_tab1|3\nsub1_tab1|4\nsub1_tab1|6" + ), "update of tab1 (delete from tab1_def + insert into tab1_1) replicated" + result = node_subscriber1.safe_psql("SELECT a FROM tab1_2_2 ORDER BY 1") + assert ( + result == "4\n6" + ), "updates of tab1 (delete + insert) replicated into tab1_2_2 correctly" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1_1 ORDER BY 1, 2") + assert result == "sub2_tab1_1|2\nsub2_tab1_1|3", "tab1_1 unchanged" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1_2 ORDER BY 1, 2") + assert result == "sub2_tab1_2|4\nsub2_tab1_2|6", "insert into tab1_2 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab1_def ORDER BY 1") + assert result == "", "delete from tab1_def replicated" + node_publisher.safe_psql("DELETE FROM tab1 WHERE a IN (2, 3, 5)") + node_publisher.safe_psql("DELETE FROM tab1_2") + node_publisher.wait_for_catchup("sub1") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT a FROM tab1") + assert result == "", "delete from tab1_1, tab1_2 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab1_1") + assert result == "", "delete from tab1_1 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab1_2") + assert result == "", "delete from tab1_2 replicated" + node_subscriber1.safe_psql("INSERT INTO tab1 (a) VALUES (1), (2), (5)") + node_subscriber2.safe_psql("INSERT INTO tab1_2 (a) VALUES (2)") + node_publisher.safe_psql("TRUNCATE tab1_2") + node_publisher.wait_for_catchup("sub1") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT a FROM tab1 ORDER BY 1") + assert result == "1\n2", "truncate of tab1_2 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab1_2 ORDER BY 1") + assert result == "", "truncate of tab1_2 replicated" + node_publisher.safe_psql("TRUNCATE tab1") + node_publisher.wait_for_catchup("sub1") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT a FROM tab1 ORDER BY 1") + assert result == "", "truncate of tab1_1 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab1 ORDER BY 1") + assert result == "", "truncate of tab1 replicated" + node_publisher.safe_psql( + "INSERT INTO tab1 VALUES (1, 'foo'), (4, 'bar'), (10, 'baz')" + ) + node_publisher.wait_for_catchup("sub1") + node_publisher.wait_for_catchup("sub2") + node_subscriber1.safe_psql("DELETE FROM tab1") + log_location = node_subscriber1.current_log_position() + node_publisher.safe_psql("UPDATE tab1 SET b = 'quux' WHERE a = 4") + node_publisher.safe_psql("DELETE FROM tab1") + node_publisher.wait_for_catchup("sub1") + node_publisher.wait_for_catchup("sub2") + assert node_subscriber1.log_matches( + r"""conflict detected on relation "public.tab1_2_2": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated: remote row \(null, 4, quux\), replica identity \(a\)=\(4\)""", + log_location, + ), "update target row is missing in tab1_2_2" + assert node_subscriber1.log_matches( + r"""conflict detected on relation "public.tab1_1": conflict=delete_missing.*\n.*DETAIL:.* Could not find the row to be deleted: replica identity \(a\)=\(1\)""", + log_location, + ), "delete target row is missing in tab1_1" + assert node_subscriber1.log_matches( + r"""conflict detected on relation "public.tab1_2_2": conflict=delete_missing.*\n.*DETAIL:.* Could not find the row to be deleted: replica identity \(a\)=\(4\)""", + log_location, + ), "delete target row is missing in tab1_2_2" + assert node_subscriber1.log_matches( + r"""conflict detected on relation "public.tab1_def": conflict=delete_missing.*\n.*DETAIL:.* Could not find the row to be deleted: replica identity \(a\)=\(10\)""", + log_location, + ), "delete target row is missing in tab1_def" + node_publisher.safe_psql("DROP PUBLICATION pub1") + node_publisher.safe_psql( + "CREATE TABLE tab2 (a int PRIMARY KEY, b text) PARTITION BY LIST (a)" + ) + node_publisher.safe_psql("CREATE TABLE tab2_1 (b text, a int NOT NULL)") + node_publisher.safe_psql( + "ALTER TABLE tab2 ATTACH PARTITION tab2_1 FOR VALUES IN (0, 1, 2, 3)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab2_2 PARTITION OF tab2 FOR VALUES IN (5, 6)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab3 (a int PRIMARY KEY, b text) PARTITION BY LIST (a)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab3_1 PARTITION OF tab3 FOR VALUES IN (0, 1, 2, 3, 5, 6)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab4 (a int PRIMARY KEY) PARTITION BY LIST (a)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab4_1 PARTITION OF tab4 FOR VALUES IN (-1, 0, 1) PARTITION BY LIST (a)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab4_1_1 PARTITION OF tab4_1 FOR VALUES IN (-1, 0, 1)" + ) + node_publisher.safe_psql( + "ALTER PUBLICATION pub_all SET (publish_via_partition_root = true)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION pub_viaroot FOR TABLE tab2, tab2_1, tab3_1 WITH (publish_via_partition_root = true)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION pub_lower_level FOR TABLE tab4_1 WITH (publish_via_partition_root = true)" + ) + node_publisher.safe_psql("INSERT INTO tab2 VALUES (1)") + node_publisher.safe_psql("INSERT INTO tab4 VALUES (-1)") + node_subscriber1.safe_psql("DROP SUBSCRIPTION sub1") + node_subscriber1.safe_psql( + "CREATE TABLE tab2 (a int PRIMARY KEY, c text DEFAULT 'sub1_tab2', b text) PARTITION BY RANGE (a)" + ) + node_subscriber1.safe_psql( + "CREATE TABLE tab2_1 (c text DEFAULT 'sub1_tab2', b text, a int NOT NULL)" + ) + node_subscriber1.safe_psql( + "ALTER TABLE tab2 ATTACH PARTITION tab2_1 FOR VALUES FROM (0) TO (10)" + ) + node_subscriber1.safe_psql( + "CREATE TABLE tab3_1 (c text DEFAULT 'sub1_tab3_1', b text, a int NOT NULL PRIMARY KEY)" + ) + node_subscriber1.safe_psql( + "CREATE SUBSCRIPTION sub_viaroot CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub_viaroot" + ) + node_subscriber2.safe_psql("DROP TABLE tab1") + node_subscriber2.safe_psql( + "CREATE TABLE tab1 (a int PRIMARY KEY, c text DEFAULT 'sub2_tab1', b text) PARTITION BY HASH (a)" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab1_part1 (b text, c text, a int NOT NULL)" + ) + node_subscriber2.safe_psql( + "ALTER TABLE tab1 ATTACH PARTITION tab1_part1 FOR VALUES WITH (MODULUS 2, REMAINDER 0)" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab1_part2 PARTITION OF tab1 FOR VALUES WITH (MODULUS 2, REMAINDER 1)" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab2 (a int PRIMARY KEY, c text DEFAULT 'sub2_tab2', b text)" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab3 (a int PRIMARY KEY, c text DEFAULT 'sub2_tab3', b text)" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab3_1 (a int PRIMARY KEY, c text DEFAULT 'sub2_tab3_1', b text)" + ) + node_subscriber2.safe_psql("CREATE TABLE tab4 (a int PRIMARY KEY)") + node_subscriber2.safe_psql("CREATE TABLE tab4_1 (a int PRIMARY KEY)") + node_subscriber2.safe_psql( + "ALTER SUBSCRIPTION sub2 SET PUBLICATION pub_lower_level, pub_all" + ) + node_subscriber1.wait_for_subscription_sync() + node_subscriber2.wait_for_subscription_sync() + result = node_subscriber1.safe_psql("SELECT c, a FROM tab2") + assert result == "sub1_tab2|1", "initial data synced for pub_viaroot" + result = node_subscriber2.safe_psql("SELECT a FROM tab4 ORDER BY 1") + assert result == "-1", "initial data synced for pub_lower_level and pub_all" + result = node_subscriber2.safe_psql("SELECT a FROM tab4_1 ORDER BY 1") + assert result == "", "initial data synced for pub_lower_level and pub_all" + node_publisher.safe_psql("INSERT INTO tab1 VALUES (1), (0)") + node_publisher.safe_psql("INSERT INTO tab1_1 (a) VALUES (3)") + node_publisher.safe_psql("INSERT INTO tab1_2 VALUES (5)") + node_publisher.safe_psql("INSERT INTO tab2 VALUES (0), (3), (5)") + node_publisher.safe_psql("INSERT INTO tab3 VALUES (1), (0), (3), (5)") + node_publisher.safe_psql("INSERT INTO tab4 VALUES (0)") + node_publisher.wait_for_catchup("sub_viaroot") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT c, a FROM tab2 ORDER BY 1, 2") + assert ( + result == "sub1_tab2|0\nsub1_tab2|1\nsub1_tab2|3\nsub1_tab2|5" + ), "inserts into tab2 replicated" + result = node_subscriber1.safe_psql("SELECT c, a FROM tab3_1 ORDER BY 1, 2") + assert ( + result == "sub1_tab3_1|0\nsub1_tab3_1|1\nsub1_tab3_1|3\nsub1_tab3_1|5" + ), "inserts into tab3_1 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1 ORDER BY 1, 2") + assert ( + result == "sub2_tab1|0\nsub2_tab1|1\nsub2_tab1|3\nsub2_tab1|5" + ), "inserts into tab1 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab2 ORDER BY 1, 2") + assert ( + result == "sub2_tab2|0\nsub2_tab2|1\nsub2_tab2|3\nsub2_tab2|5" + ), "inserts into tab2 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab3 ORDER BY 1, 2") + assert ( + result == "sub2_tab3|0\nsub2_tab3|1\nsub2_tab3|3\nsub2_tab3|5" + ), "inserts into tab3 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab4 ORDER BY 1") + assert result == "-1\n0", "inserts into tab4 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab4_1 ORDER BY 1") + assert result == "", "inserts into tab4_1 replicated" + node_subscriber2.safe_psql( + "ALTER SUBSCRIPTION sub2 SET PUBLICATION pub_all, pub_lower_level" + ) + node_subscriber2.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO tab4 VALUES (1)") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber2.safe_psql("SELECT a FROM tab4 ORDER BY 1") + assert result == "-1\n0\n1", "inserts into tab4 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab4_1 ORDER BY 1") + assert result == "", "inserts into tab4_1 replicated" + node_publisher.safe_psql("UPDATE tab1 SET a = 6 WHERE a = 5") + node_publisher.safe_psql("UPDATE tab2 SET a = 6 WHERE a = 5") + node_publisher.safe_psql("UPDATE tab3 SET a = 6 WHERE a = 5") + node_publisher.wait_for_catchup("sub_viaroot") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT c, a FROM tab2 ORDER BY 1, 2") + assert ( + result == "sub1_tab2|0\nsub1_tab2|1\nsub1_tab2|3\nsub1_tab2|6" + ), "update of tab2 replicated" + result = node_subscriber1.safe_psql("SELECT c, a FROM tab3_1 ORDER BY 1, 2") + assert ( + result == "sub1_tab3_1|0\nsub1_tab3_1|1\nsub1_tab3_1|3\nsub1_tab3_1|6" + ), "update of tab3_1 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1 ORDER BY 1, 2") + assert ( + result == "sub2_tab1|0\nsub2_tab1|1\nsub2_tab1|3\nsub2_tab1|6" + ), "inserts into tab1 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab2 ORDER BY 1, 2") + assert ( + result == "sub2_tab2|0\nsub2_tab2|1\nsub2_tab2|3\nsub2_tab2|6" + ), "inserts into tab2 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab3 ORDER BY 1, 2") + assert ( + result == "sub2_tab3|0\nsub2_tab3|1\nsub2_tab3|3\nsub2_tab3|6" + ), "inserts into tab3 replicated" + node_publisher.safe_psql("UPDATE tab1 SET a = 2 WHERE a = 6") + node_publisher.safe_psql("UPDATE tab2 SET a = 2 WHERE a = 6") + node_publisher.safe_psql("UPDATE tab3 SET a = 2 WHERE a = 6") + node_publisher.wait_for_catchup("sub_viaroot") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT c, a FROM tab2 ORDER BY 1, 2") + assert ( + result == "sub1_tab2|0\nsub1_tab2|1\nsub1_tab2|2\nsub1_tab2|3" + ), "update of tab2 replicated" + result = node_subscriber1.safe_psql("SELECT c, a FROM tab3_1 ORDER BY 1, 2") + assert ( + result == "sub1_tab3_1|0\nsub1_tab3_1|1\nsub1_tab3_1|2\nsub1_tab3_1|3" + ), "update of tab3_1 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab1 ORDER BY 1, 2") + assert ( + result == "sub2_tab1|0\nsub2_tab1|1\nsub2_tab1|2\nsub2_tab1|3" + ), "update of tab1 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab2 ORDER BY 1, 2") + assert ( + result == "sub2_tab2|0\nsub2_tab2|1\nsub2_tab2|2\nsub2_tab2|3" + ), "update of tab2 replicated" + result = node_subscriber2.safe_psql("SELECT c, a FROM tab3 ORDER BY 1, 2") + assert ( + result == "sub2_tab3|0\nsub2_tab3|1\nsub2_tab3|2\nsub2_tab3|3" + ), "update of tab3 replicated" + node_publisher.safe_psql("DELETE FROM tab1") + node_publisher.safe_psql("DELETE FROM tab2") + node_publisher.safe_psql("DELETE FROM tab3") + node_publisher.wait_for_catchup("sub_viaroot") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT a FROM tab2") + assert result == "", "delete tab2 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab1") + assert result == "", "delete from tab1 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab2") + assert result == "", "delete from tab2 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab3") + assert result == "", "delete from tab3 replicated" + node_publisher.safe_psql("INSERT INTO tab1 VALUES (1), (2), (5)") + node_publisher.safe_psql("INSERT INTO tab2 VALUES (1), (2), (5)") + node_publisher.safe_psql("TRUNCATE tab1_2, tab2_1, tab3_1") + node_publisher.wait_for_catchup("sub_viaroot") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT a FROM tab2 ORDER BY 1") + assert result == "1\n2\n5", "truncate of tab2_1 NOT replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab1 ORDER BY 1") + assert result == "1\n2\n5", "truncate of tab1_2 NOT replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab2 ORDER BY 1") + assert result == "1\n2\n5", "truncate of tab2_1 NOT replicated" + node_publisher.safe_psql("TRUNCATE tab1, tab2, tab3") + node_publisher.wait_for_catchup("sub_viaroot") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT a FROM tab2") + assert result == "", "truncate of tab2 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab1") + assert result == "", "truncate of tab1 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab2") + assert result == "", "truncate of tab2 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab3") + assert result == "", "truncate of tab3 replicated" + result = node_subscriber2.safe_psql("SELECT a FROM tab3_1") + assert result == "", "truncate of tab3_1 replicated" + node_publisher.safe_psql( + "ALTER TABLE tab2 DROP b, ADD COLUMN c text DEFAULT 'pub_tab2', ADD b text" + ) + node_publisher.safe_psql( + "INSERT INTO tab2 (a, b) VALUES (1, 'xxx'), (3, 'yyy'), (5, 'zzz')" + ) + node_publisher.safe_psql("INSERT INTO tab2 (a, b, c) VALUES (6, 'aaa', 'xxx_c')") + node_publisher.wait_for_catchup("sub_viaroot") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber1.safe_psql("SELECT c, a, b FROM tab2 ORDER BY 1, 2") + assert ( + result == "pub_tab2|1|xxx\npub_tab2|3|yyy\npub_tab2|5|zzz\nxxx_c|6|aaa" + ), "inserts into tab2 replicated" + result = node_subscriber2.safe_psql("SELECT c, a, b FROM tab2 ORDER BY 1, 2") + assert ( + result == "pub_tab2|1|xxx\npub_tab2|3|yyy\npub_tab2|5|zzz\nxxx_c|6|aaa" + ), "inserts into tab2 replicated" + node_subscriber1.safe_psql("DELETE FROM tab2") + log_location = node_subscriber1.current_log_position() + node_publisher.safe_psql("UPDATE tab2 SET b = 'quux' WHERE a = 5") + node_publisher.safe_psql("DELETE FROM tab2 WHERE a = 1") + node_publisher.wait_for_catchup("sub_viaroot") + node_publisher.wait_for_catchup("sub2") + assert node_subscriber1.log_matches( + r"""conflict detected on relation "public.tab2_1": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated: remote row \(pub_tab2, quux, 5\), replica identity \(a\)=\(5\)""", + log_location, + ), "update target row is missing in tab2_1" + assert node_subscriber1.log_matches( + r"""conflict detected on relation "public.tab2_1": conflict=delete_missing.*\n.*DETAIL:.* Could not find the row to be deleted: replica identity \(a\)=\(1\)""", + log_location, + ), "delete target row is missing in tab2_1" + node_subscriber1.append_conf("track_commit_timestamp = on") + node_subscriber1.restart() + node_subscriber1.safe_psql("INSERT INTO tab2 VALUES (3, 'yyy')") + node_publisher.safe_psql("UPDATE tab2 SET b = 'quux' WHERE a = 3") + node_publisher.wait_for_catchup("sub_viaroot") + assert node_subscriber1.log_matches( + r"""conflict detected on relation "public.tab2_1": conflict=update_origin_differs.*\n.*DETAIL:.* Updating the row that was modified locally in transaction [0-9]+ at .*: local row \(yyy, null, 3\), remote row \(pub_tab2, quux, 3\), replica identity \(a\)=\(3\).""", + log_location, + ), "updating a row that was modified by a different origin" + node_subscriber1.append_conf("track_commit_timestamp = off") + node_subscriber1.restart() + node_publisher.safe_psql( + "CREATE TABLE tab5 (a int NOT NULL, b int);\n\tCREATE UNIQUE INDEX tab5_a_idx ON tab5 (a);\n\tALTER TABLE tab5 REPLICA IDENTITY USING INDEX tab5_a_idx;" + ) + node_subscriber2.safe_psql( + "CREATE TABLE tab5 (a int NOT NULL, b int, c int) PARTITION BY LIST (a);\n\tCREATE TABLE tab5_1 PARTITION OF tab5 DEFAULT;\n\tCREATE UNIQUE INDEX tab5_a_idx ON tab5 (a);\n\tALTER TABLE tab5 REPLICA IDENTITY USING INDEX tab5_a_idx;\n\tALTER TABLE tab5_1 REPLICA IDENTITY USING INDEX tab5_1_a_idx;" + ) + node_subscriber2.safe_psql("ALTER SUBSCRIPTION sub2 REFRESH PUBLICATION") + node_subscriber2.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO tab5 VALUES (1, 1)") + node_publisher.safe_psql("UPDATE tab5 SET a = 2 WHERE a = 1") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber2.safe_psql("SELECT a, b FROM tab5 ORDER BY 1") + assert result == "2|1", "updates of tab5 replicated correctly" + node_subscriber2.safe_psql( + "ALTER TABLE tab5 DETACH PARTITION tab5_1;\n\tALTER TABLE tab5_1 DROP COLUMN b;\n\tALTER TABLE tab5_1 ADD COLUMN b int;\n\tALTER TABLE tab5 ATTACH PARTITION tab5_1 DEFAULT" + ) + node_publisher.safe_psql("UPDATE tab5 SET a = 3 WHERE a = 2") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber2.safe_psql("SELECT a, b, c FROM tab5 ORDER BY 1") + assert ( + result == "3|1|" + ), "updates of tab5 replicated correctly after altering table on subscriber" + node_publisher.safe_psql( + "ALTER TABLE tab5 DROP COLUMN b, ADD COLUMN c INT;\n\tALTER TABLE tab5 ADD COLUMN b INT;" + ) + node_publisher.safe_psql("UPDATE tab5 SET c = 1 WHERE a = 3") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber2.safe_psql("SELECT a, b, c FROM tab5 ORDER BY 1") + assert ( + result == "3||1" + ), "updates of tab5 replicated correctly after altering table on publisher" + node_subscriber2.safe_psql("ALTER TABLE tab5 REPLICA IDENTITY NOTHING") + node_publisher.safe_psql("UPDATE tab5 SET a = 4 WHERE a = 3") + node_publisher.wait_for_catchup("sub2") + result = node_subscriber2.safe_psql("SELECT a, b, c FROM tab5_1 ORDER BY 1") + assert result == "4||1", "updates of tab5 replicated correctly" diff --git a/src/test/subscription/pyt/test_014_binary.py b/src/test/subscription/pyt/test_014_binary.py new file mode 100644 index 0000000000000..cf749963be5c7 --- /dev/null +++ b/src/test/subscription/pyt/test_014_binary.py @@ -0,0 +1,177 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/014_binary.pl. + +Binary mode logical replication. +""" + +_DDL = """ +CREATE TABLE public.test_numerical ( + a INTEGER PRIMARY KEY, + b NUMERIC, + c FLOAT, + d BIGINT + ); +CREATE TABLE public.test_arrays ( + a INTEGER[] PRIMARY KEY, + b NUMERIC[], + c TEXT[] + ); +""" + +_SYNC_CHECK = ( + "SELECT a, b, c, d FROM test_numerical ORDER BY a;\n" + "SELECT a, b, c FROM test_arrays ORDER BY a;" +) + + +def _setup_custom_type(publisher, subscriber): + """Custom type without binary send/recv first fails, then succeeds.""" + ddl = ( + "CREATE TYPE myvarchar;\n" + "CREATE FUNCTION myvarcharin(cstring, oid, integer) RETURNS myvarchar " + "LANGUAGE internal IMMUTABLE PARALLEL SAFE STRICT AS 'varcharin';\n" + "CREATE FUNCTION myvarcharout(myvarchar) RETURNS cstring " + "LANGUAGE internal IMMUTABLE PARALLEL SAFE STRICT AS 'varcharout';\n" + "CREATE TYPE myvarchar (input = myvarcharin, output = myvarcharout);\n" + "CREATE TABLE public.test_myvarchar (a myvarchar);" + ) + publisher.safe_psql(ddl) + subscriber.safe_psql(ddl) + publisher.safe_psql("INSERT INTO public.test_myvarchar (a) VALUES ('a');") + + offset = subscriber.current_log_position() + subscriber.safe_psql("ALTER SUBSCRIPTION tsub REFRESH PUBLICATION") + subscriber.wait_for_log( + r"ERROR: ( [A-Z0-9]+:)? no binary input function available for type", offset + ) + + sendrecv = ( + "CREATE FUNCTION myvarcharsend(myvarchar) RETURNS bytea " + "LANGUAGE internal STABLE PARALLEL SAFE STRICT AS 'varcharsend';\n" + "CREATE FUNCTION myvarcharrecv(internal, oid, integer) RETURNS myvarchar " + "LANGUAGE internal STABLE PARALLEL SAFE STRICT AS 'varcharrecv';\n" + "ALTER TYPE myvarchar SET (send = myvarcharsend, receive = myvarcharrecv);" + ) + publisher.safe_psql(sendrecv) + subscriber.safe_psql(sendrecv) + subscriber.wait_for_subscription_sync(publisher, "tsub") + assert ( + subscriber.safe_psql("SELECT a FROM test_myvarchar;") == "a" + ), "check synced data on subscriber with custom type" + + +def _test_mismatched_types(publisher, subscriber): + """Type mismatch fails in binary mode but syncs once binary is disabled.""" + publisher.safe_psql( + "CREATE TABLE public.test_mismatching_types (a bigint PRIMARY KEY);\n" + "INSERT INTO public.test_mismatching_types (a) VALUES (1), (2);" + ) + offset = subscriber.current_log_position() + subscriber.safe_psql( + "CREATE TABLE public.test_mismatching_types (a int PRIMARY KEY);\n" + "ALTER SUBSCRIPTION tsub REFRESH PUBLICATION;" + ) + subscriber.wait_for_log( + r"ERROR: ( [A-Z0-9]+:)? incorrect binary data format", offset + ) + + pub_offset = publisher.current_log_position() + subscriber.safe_psql("ALTER SUBSCRIPTION tsub SET (binary = false);") + publisher.wait_for_log( + r"LOG: ( [A-Z0-9]+:)? statement: COPY (.+)? TO STDOUT\n", pub_offset + ) + subscriber.wait_for_subscription_sync(publisher, "tsub") + assert ( + subscriber.safe_psql("SELECT a FROM test_mismatching_types ORDER BY a;") + == "1\n2" + ), "check synced data on subscriber with binary = false" + + +def test_binary(create_pg): + """Binary COPY and apply, format switching, custom types, type mismatch.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + publisher.safe_psql(_DDL) + subscriber.safe_psql(_DDL) + publisher.safe_psql("CREATE PUBLICATION tpub FOR ALL TABLES") + + publisher.safe_psql( + "INSERT INTO public.test_numerical (a, b, c, d) VALUES " + "(1, 1.2, 1.3, 10), (2, 2.2, 2.3, 20);\n" + "INSERT INTO public.test_arrays (a, b, c) VALUES " + "('{1,2,3}', '{1.1, 1.2, 1.3}', '{\"one\", \"two\", \"three\"}'), " + "('{3,1,2}', '{1.3, 1.1, 1.2}', '{\"three\", \"one\", \"two\"}');" + ) + connstr = publisher.connstr() + " dbname=postgres" + subscriber.safe_psql( + "CREATE SUBSCRIPTION tsub CONNECTION '{}' PUBLICATION tpub " + "WITH (slot_name = tpub_slot, binary = true)".format(connstr) + ) + # The publisher's COPY must run in binary format. + publisher.wait_for_log( + r"LOG: ( [A-Z0-9]+:)? statement: COPY (.+)? TO STDOUT WITH \(FORMAT binary\)" + ) + subscriber.wait_for_subscription_sync(publisher, "tsub") + assert subscriber.safe_psql(_SYNC_CHECK) == ( + "1|1.2|1.3|10\n2|2.2|2.3|20\n" + "{1,2,3}|{1.1,1.2,1.3}|{one,two,three}\n" + "{3,1,2}|{1.3,1.1,1.2}|{three,one,two}" + ), "check synced data on subscriber" + + # Binary apply. + publisher.safe_psql( + "INSERT INTO public.test_arrays (a, b, c) VALUES " + "('{2,1,3}', '{1.2, 1.1, 1.3}', '{\"two\", \"one\", \"three\"}'), " + "('{1,3,2}', '{1.1, 1.3, 1.2}', '{\"one\", \"three\", \"two\"}');\n" + "INSERT INTO public.test_numerical (a, b, c, d) VALUES " + "(3, 3.2, 3.3, 30), (4, 4.2, 4.3, 40);" + ) + publisher.wait_for_catchup("tsub") + assert ( + subscriber.safe_psql("SELECT a, b, c, d FROM test_numerical ORDER BY a") + == "1|1.2|1.3|10\n2|2.2|2.3|20\n3|3.2|3.3|30\n4|4.2|4.3|40" + ), "check replicated data on subscriber" + + publisher.safe_psql( + "UPDATE public.test_arrays SET b[1] = 42, c = NULL;\n" + "UPDATE public.test_numerical SET b = 42, c = NULL;" + ) + publisher.wait_for_catchup("tsub") + assert subscriber.safe_psql("SELECT a, b, c FROM test_arrays ORDER BY a") == ( + "{1,2,3}|{42,1.2,1.3}|\n{1,3,2}|{42,1.3,1.2}|\n" + "{2,1,3}|{42,1.1,1.3}|\n{3,1,2}|{42,1.1,1.2}|" + ), "check updated replicated data on subscriber" + assert ( + subscriber.safe_psql("SELECT a, b, c, d FROM test_numerical ORDER BY a") + == "1|42||10\n2|42||20\n3|42||30\n4|42||40" + ), "check updated replicated data on subscriber" + + # Switch to text format and back to binary. + subscriber.safe_psql("ALTER SUBSCRIPTION tsub SET (binary = false);") + publisher.safe_psql( + "INSERT INTO public.test_numerical (a, b, c, d) VALUES (5, 5.2, 5.3, 50);" + ) + publisher.wait_for_catchup("tsub") + assert ( + subscriber.safe_psql("SELECT a, b, c, d FROM test_numerical ORDER BY a") + == "1|42||10\n2|42||20\n3|42||30\n4|42||40\n5|5.2|5.3|50" + ), "check replicated data on subscriber" + + subscriber.safe_psql("ALTER SUBSCRIPTION tsub SET (binary = true);") + publisher.safe_psql( + "INSERT INTO public.test_arrays (a, b, c) VALUES " + "('{2,3,1}', '{1.2, 1.3, 1.1}', '{\"two\", \"three\", \"one\"}');" + ) + publisher.wait_for_catchup("tsub") + assert subscriber.safe_psql("SELECT a, b, c FROM test_arrays ORDER BY a") == ( + "{1,2,3}|{42,1.2,1.3}|\n{1,3,2}|{42,1.3,1.2}|\n" + "{2,1,3}|{42,1.1,1.3}|\n{2,3,1}|{1.2,1.3,1.1}|{two,three,one}\n" + "{3,1,2}|{42,1.1,1.2}|" + ), "check replicated data on subscriber" + + _setup_custom_type(publisher, subscriber) + _test_mismatched_types(publisher, subscriber) + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_015_stream.py b/src/test/subscription/pyt/test_015_stream.py new file mode 100644 index 0000000000000..b97bf75a08dd8 --- /dev/null +++ b/src/test/subscription/pyt/test_015_stream.py @@ -0,0 +1,220 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/015_stream.pl. + +Streaming of large (>64kB) logical-replication transactions in both +streaming=on and streaming=parallel modes, including binary mode, retention of +locally-changed extra columns, deadlock detection among the leader and parallel +apply workers, and serialization of streamed changes to a file. +""" + +_PARALLEL_FINISHED = r"DEBUG: ( [A-Z0-9]+:)? finished processing the STREAM {} command" +_APPLIED_CHUNK = r"DEBUG: ( [A-Z0-9]+:)? applied [0-9]+ changes in the streaming chunk" +_DEADLOCK = r"ERROR: ( [A-Z0-9]+:)? deadlock detected" +_SERIALIZE = ( + r"LOG: ( [A-Z0-9]+:)? logical replication apply worker will serialize the " + r"remaining changes of remote transaction \d+ to a file" +) + + +def _check_parallel_log(subscriber, offset, is_parallel, kind): + """Wait for the parallel apply worker to finish a STREAM command.""" + if is_parallel: + subscriber.wait_for_log(_PARALLEL_FINISHED.format(kind), offset) + + +def _test_streaming(publisher, subscriber, appname, is_parallel): + """Common streaming checks for both streaming=on and streaming=parallel.""" + handle = publisher.background_psql("postgres", on_error_stop=False) + offset = subscriber.current_log_position() + handle.query_safe( + "BEGIN;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea) " + "FROM generate_series(3, 5000) s(i);\n" + "UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0;\n" + "DELETE FROM test_tab WHERE mod(a,3) = 0;" + ) + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea) " + "FROM generate_series(5001, 9999) s(i);\n" + "DELETE FROM test_tab WHERE a > 5000;\n" + "COMMIT;" + ) + handle.query_safe("COMMIT") + handle.quit() + + publisher.wait_for_catchup(appname) + _check_parallel_log(subscriber, offset, is_parallel, "COMMIT") + assert ( + subscriber.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + == "3334|3334|3334" + ), "check extra columns contain local defaults" + + # Test streaming in binary mode. + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub SET (binary = on)") + offset = subscriber.current_log_position() + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea) " + "FROM generate_series(5001, 10000) s(i);\n" + "UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0;\n" + "DELETE FROM test_tab WHERE mod(a,3) = 0;\n" + "COMMIT;" + ) + publisher.wait_for_catchup(appname) + _check_parallel_log(subscriber, offset, is_parallel, "COMMIT") + assert ( + subscriber.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + == "6667|6667|6667" + ), "check extra columns contain local defaults" + + # Locally changed extra columns must be retained after a streaming txn. + subscriber.safe_psql( + "UPDATE test_tab SET c = 'epoch'::timestamptz + 987654321 * interval '1s'" + ) + offset = subscriber.current_log_position() + publisher.safe_psql("UPDATE test_tab SET b = sha256(a::text::bytea)") + publisher.wait_for_catchup(appname) + _check_parallel_log(subscriber, offset, is_parallel, "COMMIT") + assert ( + subscriber.safe_psql( + "SELECT count(*), count(extract(epoch from c) = 987654321), " + "count(d = 999) FROM test_tab" + ) + == "6667|6667|6667" + ), "check extra columns contain locally changed data" + + publisher.safe_psql("DELETE FROM test_tab WHERE (a > 2)") + publisher.wait_for_catchup(appname) + + +def _setup(create_pg): + """Create publisher/subscriber, tables, publication and subscription.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf("logical_decoding_work_mem = 64kB") + publisher.start() + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE test_tab (a int primary key, b bytea)") + publisher.safe_psql("INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')") + publisher.safe_psql("CREATE TABLE test_tab_2 (a int)") + + subscriber.safe_psql( + "CREATE TABLE test_tab (a int primary key, b bytea, " + "c timestamptz DEFAULT now(), d bigint DEFAULT 999)" + ) + subscriber.safe_psql("CREATE TABLE test_tab_2 (a int)") + subscriber.safe_psql("CREATE UNIQUE INDEX idx_tab on test_tab_2(a)") + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE test_tab, test_tab_2") + appname = "tap_sub" + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub " + "WITH (streaming = on)".format(connstr, appname) + ) + subscriber.wait_for_subscription_sync(publisher, appname) + return publisher, subscriber, appname + + +def _switch_to_parallel(publisher, subscriber, appname): + """Switch the subscription to streaming=parallel and bump log verbosity.""" + oldpid = publisher.safe_psql( + "SELECT pid FROM pg_stat_replication " + "WHERE application_name = '{}' AND state = 'streaming';".format(appname) + ) + subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub SET(streaming = parallel, binary = off)" + ) + assert publisher.poll_query_until( + "SELECT pid != {} FROM pg_stat_replication " + "WHERE application_name = '{}' AND state = 'streaming';".format(oldpid, appname) + ), "Timed out while waiting for apply to restart after changing SUBSCRIPTION" + subscriber.append_conf("log_min_messages = debug1") + subscriber.reload() + subscriber.safe_psql("SELECT 1") + + +def _deadlock_round(publisher, subscriber, appname, conflicting_insert, expected): + """Drive one deadlock-detection round and verify the post-recovery count.""" + offset = subscriber.current_log_position() + handle = publisher.background_psql("postgres", on_error_stop=False) + handle.query_safe( + "BEGIN;\nINSERT INTO test_tab_2 SELECT i FROM generate_series(1, 5000) s(i);" + ) + subscriber.wait_for_log(_APPLIED_CHUNK, offset) + publisher.safe_psql(conflicting_insert) + handle.query_safe("COMMIT") + handle.quit() + subscriber.wait_for_log(_DEADLOCK, offset) + + # Drop the unique index so both transactions can complete without conflict. + subscriber.safe_psql("DROP INDEX idx_tab") + publisher.wait_for_catchup(appname) + assert ( + subscriber.safe_psql("SELECT count(*) FROM test_tab_2") == expected + ), "data replicated to subscriber after dropping index" + + +def _test_deadlocks(publisher, subscriber, appname): + """Detect deadlocks between leader/parallel workers and between two parallel.""" + subscriber.append_conf("deadlock_timeout = 10ms") + subscriber.reload() + subscriber.safe_psql("SELECT 1") + + _deadlock_round( + publisher, subscriber, appname, "INSERT INTO test_tab_2 values(1)", "5001" + ) + + publisher.safe_psql("TRUNCATE TABLE test_tab_2") + publisher.wait_for_catchup(appname) + subscriber.safe_psql("CREATE UNIQUE INDEX idx_tab on test_tab_2(a)") + + _deadlock_round( + publisher, + subscriber, + appname, + "INSERT INTO test_tab_2 SELECT i FROM generate_series(1, 5000) s(i)", + "10000", + ) + + +def _test_serialize_to_file(publisher, subscriber, appname): + """Serialize streamed changes to a file and apply at transaction end.""" + subscriber.append_conf("debug_logical_replication_streaming = immediate") + subscriber.append_conf("log_min_messages = warning") + subscriber.reload() + subscriber.safe_psql("SELECT 1") + + offset = subscriber.current_log_position() + publisher.safe_psql( + "INSERT INTO test_tab_2 SELECT i FROM generate_series(1, 5000) s(i)" + ) + subscriber.wait_for_log(_SERIALIZE, offset) + publisher.wait_for_catchup(appname) + assert ( + subscriber.safe_psql("SELECT count(*) FROM test_tab_2") == "15000" + ), "parallel apply worker replayed all changes from file" + + +def test_015_stream(create_pg): + """Large transactions stream in on/parallel modes with deadlock handling.""" + publisher, subscriber, appname = _setup(create_pg) + + assert ( + subscriber.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + == "2|2|2" + ), "check initial data was copied to subscriber" + + _test_streaming(publisher, subscriber, appname, False) + + _switch_to_parallel(publisher, subscriber, appname) + _test_streaming(publisher, subscriber, appname, True) + + _test_deadlocks(publisher, subscriber, appname) + _test_serialize_to_file(publisher, subscriber, appname) + + subscriber.stop() + publisher.stop() diff --git a/src/test/subscription/pyt/test_016_stream_subxact.py b/src/test/subscription/pyt/test_016_stream_subxact.py new file mode 100644 index 0000000000000..7b0235bf8f103 --- /dev/null +++ b/src/test/subscription/pyt/test_016_stream_subxact.py @@ -0,0 +1,103 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/016_stream_subxact.pl. + +Streaming of a transaction containing subtransactions, in both streaming=on +and streaming=parallel modes. +""" + +_WORKLOAD = """\ +BEGIN; +INSERT INTO test_tab SELECT i, sha256(i::text::bytea) FROM generate_series(3, 5) s(i); +UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s1; +INSERT INTO test_tab SELECT i, sha256(i::text::bytea) FROM generate_series(6, 8) s(i); +UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s2; +INSERT INTO test_tab SELECT i, sha256(i::text::bytea) FROM generate_series(9, 11) s(i); +UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s3; +INSERT INTO test_tab SELECT i, sha256(i::text::bytea) FROM generate_series(12, 14) s(i); +UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s4; +INSERT INTO test_tab SELECT i, sha256(i::text::bytea) FROM generate_series(15, 17) s(i); +UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +COMMIT; +""" + + +def _test_streaming(publisher, subscriber, appname, is_parallel): + offset = subscriber.current_log_position() + + publisher.safe_psql(_WORKLOAD) + publisher.wait_for_catchup(appname) + + if is_parallel: + subscriber.wait_for_log( + r"DEBUG: ( [A-Z0-9]+:)? finished processing the STREAM COMMIT command", + offset, + ) + + assert ( + subscriber.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + == "12|12|12" + ), "streaming mode copied data; extra columns get local defaults" + + publisher.safe_psql("DELETE FROM test_tab WHERE (a > 2)") + publisher.wait_for_catchup(appname) + + +def test_stream_subxact(create_pg): + """Subtransactions stream correctly in both on and parallel modes.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf("debug_logical_replication_streaming = immediate") + publisher.start() + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE test_tab (a int primary key, b bytea)") + publisher.safe_psql("INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')") + subscriber.safe_psql( + "CREATE TABLE test_tab (a int primary key, b bytea, " + "c timestamptz DEFAULT now(), d bigint DEFAULT 999)" + ) + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE test_tab") + appname = "tap_sub" + + # Streaming mode 'on'. + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub " + "WITH (streaming = on)".format(connstr, appname) + ) + subscriber.wait_for_subscription_sync(publisher, appname) + assert ( + subscriber.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + == "2|2|2" + ), "check initial data was copied to subscriber" + + _test_streaming(publisher, subscriber, appname, False) + + # Streaming mode 'parallel'. + oldpid = publisher.safe_psql( + "SELECT pid FROM pg_stat_replication " + "WHERE application_name = '{}' AND state = 'streaming';".format(appname) + ) + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub SET(streaming = parallel)") + assert publisher.poll_query_until( + "SELECT pid != {} FROM pg_stat_replication " + "WHERE application_name = '{}' AND state = 'streaming';".format(oldpid, appname) + ), "apply restarted after changing SUBSCRIPTION" + + # Bump log verbosity so the parallel apply worker's DEBUG lines appear. + subscriber.append_conf("log_min_messages = debug1") + subscriber.reload() + subscriber.safe_psql("SELECT 1") + + _test_streaming(publisher, subscriber, appname, True) diff --git a/src/test/subscription/pyt/test_017_stream_ddl.py b/src/test/subscription/pyt/test_017_stream_ddl.py new file mode 100644 index 0000000000000..d0ee2f6cb9c4f --- /dev/null +++ b/src/test/subscription/pyt/test_017_stream_ddl.py @@ -0,0 +1,100 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/017_stream_ddl.pl. + +Streaming of large transactions with DDL and subtransactions. +""" + + +def test_stream_ddl(create_pg): + """Streamed and non-streamed txns with interleaved DDL replicate correctly.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf("logical_decoding_work_mem = 64kB") + publisher.start() + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE test_tab (a int primary key, b varchar)") + publisher.safe_psql("INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')") + subscriber.safe_psql( + "CREATE TABLE test_tab " + "(a int primary key, b bytea, c INT, d INT, e INT, f INT)" + ) + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE test_tab") + appname = "tap_sub" + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub " + "WITH (streaming = on)".format(connstr, appname) + ) + + subscriber.wait_for_subscription_sync(publisher, appname) + assert ( + subscriber.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + == "2|0|0" + ), "check initial data was copied to subscriber" + + # Small (non-streamed) txn with DDL and DML. + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab VALUES (3, sha256(3::text::bytea));\n" + "ALTER TABLE test_tab ADD COLUMN c INT;\n" + "SAVEPOINT s1;\n" + "INSERT INTO test_tab VALUES (4, sha256(4::text::bytea), -4);\n" + "COMMIT;\n" + ) + # Large (streamed) txn with DDL and DML. + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea), -i " + "FROM generate_series(5, 1000) s(i);\n" + "ALTER TABLE test_tab ADD COLUMN d INT;\n" + "SAVEPOINT s1;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea), -i, 2*i " + "FROM generate_series(1001, 2000) s(i);\n" + "COMMIT;\n" + ) + # Small (non-streamed) txn with DDL and DML. + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab VALUES (2001, sha256(2001::text::bytea), -2001, " + "2*2001);\n" + "ALTER TABLE test_tab ADD COLUMN e INT;\n" + "SAVEPOINT s1;\n" + "INSERT INTO test_tab VALUES (2002, sha256(2002::text::bytea), -2002, " + "2*2002, -3*2002);\n" + "COMMIT;\n" + ) + + publisher.wait_for_catchup(appname) + assert ( + subscriber.safe_psql( + "SELECT count(*), count(c), count(d), count(e) FROM test_tab" + ) + == "2002|1999|1002|1" + ), "streaming mode copied; extra columns get local defaults" + + # Large (streamed) txn with a DDL after DML, invalidating the sent schema. + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea), -i, 2*i, -3*i " + "FROM generate_series(2003,5000) s(i);\n" + "ALTER TABLE test_tab ADD COLUMN f INT;\n" + "COMMIT;\n" + ) + # Small txn to force the schema to be sent again with the new column. + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea), -i, 2*i, -3*i, 4*i " + "FROM generate_series(5001,5005) s(i);\n" + "COMMIT;\n" + ) + + publisher.wait_for_catchup(appname) + assert ( + subscriber.safe_psql( + "SELECT count(*), count(c), count(d), count(e), count(f) FROM test_tab" + ) + == "5005|5002|4005|3004|5" + ), "data copied for both streaming and non-streaming transactions" diff --git a/src/test/subscription/pyt/test_018_stream_subxact_abort.py b/src/test/subscription/pyt/test_018_stream_subxact_abort.py new file mode 100644 index 0000000000000..8c6ffc09d979d --- /dev/null +++ b/src/test/subscription/pyt/test_018_stream_subxact_abort.py @@ -0,0 +1,169 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/018_stream_subxact_abort.pl. + +Streaming of transactions containing multiple subtransactions and rollbacks, +in both streaming=on and streaming=parallel modes, plus serialize-to-file. +""" + +_SERIALIZE_LOG = ( + r"LOG: ( [A-Z0-9]+:)? logical replication apply worker will serialize the " + r"remaining changes of remote transaction \d+ to a file" +) + + +def _count(node): + return node.safe_psql("SELECT count(*), count(c) FROM test_tab") + + +def _wait_parallel(subscriber, offset, is_parallel, type_): + if is_parallel: + subscriber.wait_for_log( + r"DEBUG: ( [A-Z0-9]+:)? finished processing the STREAM {} " + r"command".format(type_), + offset, + ) + + +def _test_streaming(publisher, subscriber, appname, is_parallel): + offset = subscriber.current_log_position() + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab VALUES (3, sha256(3::text::bytea));\n" + "SAVEPOINT s1;\n" + "INSERT INTO test_tab VALUES (4, sha256(4::text::bytea));\n" + "SAVEPOINT s2;\n" + "INSERT INTO test_tab VALUES (5, sha256(5::text::bytea));\n" + "SAVEPOINT s3;\n" + "INSERT INTO test_tab VALUES (6, sha256(6::text::bytea));\n" + "ROLLBACK TO s2;\n" + "INSERT INTO test_tab VALUES (7, sha256(7::text::bytea));\n" + "ROLLBACK TO s1;\n" + "INSERT INTO test_tab VALUES (8, sha256(8::text::bytea));\n" + "SAVEPOINT s4;\n" + "INSERT INTO test_tab VALUES (9, sha256(9::text::bytea));\n" + "SAVEPOINT s5;\n" + "INSERT INTO test_tab VALUES (10, sha256(10::text::bytea));\n" + "COMMIT;\n" + ) + publisher.wait_for_catchup(appname) + _wait_parallel(subscriber, offset, is_parallel, "COMMIT") + assert ( + _count(subscriber) == "6|0" + ), "rollback to savepoint reflected; local defaults" + + offset = subscriber.current_log_position() + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab VALUES (11, sha256(11::text::bytea));\n" + "SAVEPOINT s1;\n" + "INSERT INTO test_tab VALUES (12, sha256(12::text::bytea));\n" + "SAVEPOINT s2;\n" + "INSERT INTO test_tab VALUES (13, sha256(13::text::bytea));\n" + "SAVEPOINT s3;\n" + "INSERT INTO test_tab VALUES (14, sha256(14::text::bytea));\n" + "RELEASE s2;\n" + "INSERT INTO test_tab VALUES (15, sha256(15::text::bytea));\n" + "ROLLBACK TO s1;\n" + "COMMIT;\n" + ) + publisher.wait_for_catchup(appname) + _wait_parallel(subscriber, offset, is_parallel, "COMMIT") + assert _count(subscriber) == "7|0", "rollback to savepoint reflected" + + offset = subscriber.current_log_position() + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab VALUES (16, sha256(16::text::bytea));\n" + "SAVEPOINT s1;\n" + "INSERT INTO test_tab VALUES (17, sha256(17::text::bytea));\n" + "SAVEPOINT s2;\n" + "INSERT INTO test_tab VALUES (18, sha256(18::text::bytea));\n" + "ROLLBACK;\n" + ) + publisher.wait_for_catchup(appname) + _wait_parallel(subscriber, offset, is_parallel, "ABORT") + assert _count(subscriber) == "7|0", "rollback was reflected on subscriber" + + publisher.safe_psql("DELETE FROM test_tab WHERE (a > 2)") + publisher.wait_for_catchup(appname) + + +def _test_serialize(publisher, subscriber, appname): + """Serialize changes to a file and apply them at end-of-transaction.""" + subscriber.append_conf("debug_logical_replication_streaming = immediate") + subscriber.append_conf("log_min_messages = warning") + subscriber.reload() + subscriber.safe_psql("SELECT 1") + + offset = subscriber.current_log_position() + publisher.safe_psql("BEGIN;\nINSERT INTO test_tab_2 values(1);\nROLLBACK;\n") + subscriber.wait_for_log(_SERIALIZE_LOG, offset) + publisher.wait_for_catchup(appname) + assert ( + subscriber.safe_psql("SELECT count(*) FROM test_tab_2") == "0" + ), "rollback was reflected on subscriber" + + offset = subscriber.current_log_position() + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab_2 values(1);\n" + "SAVEPOINT sp;\n" + "INSERT INTO test_tab_2 values(1);\n" + "ROLLBACK TO sp;\n" + "COMMIT;\n" + ) + subscriber.wait_for_log(_SERIALIZE_LOG, offset) + publisher.wait_for_catchup(appname) + assert ( + subscriber.safe_psql("SELECT count(*) FROM test_tab_2") == "1" + ), "rollback to savepoint was reflected on subscriber" + + +def test_stream_subxact_abort(create_pg): + """Subxact rollbacks stream correctly in on/parallel modes and via files.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf("debug_logical_replication_streaming = immediate") + publisher.start() + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE test_tab (a int primary key, b bytea)") + publisher.safe_psql("INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')") + publisher.safe_psql("CREATE TABLE test_tab_2 (a int)") + subscriber.safe_psql( + "CREATE TABLE test_tab (a int primary key, b text, c INT, d INT, e INT)" + ) + subscriber.safe_psql("CREATE TABLE test_tab_2 (a int)") + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE test_tab, test_tab_2") + appname = "tap_sub" + + # Streaming mode 'on'. + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub " + "WITH (streaming = on)".format(connstr, appname) + ) + subscriber.wait_for_subscription_sync(publisher, appname) + assert _count(subscriber) == "2|0", "check initial data was copied to subscriber" + + _test_streaming(publisher, subscriber, appname, False) + + # Streaming mode 'parallel'. + oldpid = publisher.safe_psql( + "SELECT pid FROM pg_stat_replication " + "WHERE application_name = '{}' AND state = 'streaming';".format(appname) + ) + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub SET(streaming = parallel)") + assert publisher.poll_query_until( + "SELECT pid != {} FROM pg_stat_replication " + "WHERE application_name = '{}' AND state = 'streaming';".format(oldpid, appname) + ), "apply restarted after changing SUBSCRIPTION" + + subscriber.append_conf("log_min_messages = debug1") + subscriber.reload() + subscriber.safe_psql("SELECT 1") + + _test_streaming(publisher, subscriber, appname, True) + _test_serialize(publisher, subscriber, appname) diff --git a/src/test/subscription/pyt/test_019_stream_subxact_ddl_abort.py b/src/test/subscription/pyt/test_019_stream_subxact_ddl_abort.py new file mode 100644 index 0000000000000..5476770c31b9b --- /dev/null +++ b/src/test/subscription/pyt/test_019_stream_subxact_ddl_abort.py @@ -0,0 +1,58 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/019_stream_subxact_ddl_abort.pl. + +Streaming of a transaction with subtransactions, DDLs, DMLs and rollbacks. +""" + + +def test_stream_subxact_ddl_abort(create_pg): + """ROLLBACK TO savepoint inside a streamed txn is reflected on subscriber.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf("debug_logical_replication_streaming = immediate") + publisher.start() + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE test_tab (a int primary key, b bytea)") + publisher.safe_psql("INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')") + subscriber.safe_psql( + "CREATE TABLE test_tab (a int primary key, b bytea, c INT, d INT, e INT)" + ) + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE test_tab") + appname = "tap_sub" + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub " + "WITH (streaming = on)".format(connstr, appname) + ) + + subscriber.wait_for_subscription_sync(publisher, appname) + assert ( + subscriber.safe_psql("SELECT count(*), count(c) FROM test_tab") == "2|0" + ), "check initial data was copied to subscriber" + + # Streamed transaction with DDL, DML and ROLLBACKs. + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab VALUES (3, sha256(3::text::bytea));\n" + "ALTER TABLE test_tab ADD COLUMN c INT;\n" + "SAVEPOINT s1;\n" + "INSERT INTO test_tab VALUES (4, sha256(4::text::bytea), -4);\n" + "ALTER TABLE test_tab ADD COLUMN d INT;\n" + "SAVEPOINT s2;\n" + "INSERT INTO test_tab VALUES (5, sha256(5::text::bytea), -5, 5*2);\n" + "ALTER TABLE test_tab ADD COLUMN e INT;\n" + "SAVEPOINT s3;\n" + "INSERT INTO test_tab VALUES (6, sha256(6::text::bytea), -6, 6*2, -6*3);\n" + "ALTER TABLE test_tab DROP COLUMN c;\n" + "ROLLBACK TO s1;\n" + "INSERT INTO test_tab VALUES (4, sha256(4::text::bytea), 4);\n" + "COMMIT;\n" + ) + + publisher.wait_for_catchup(appname) + assert ( + subscriber.safe_psql("SELECT count(*), count(c) FROM test_tab") == "4|1" + ), "rollback to savepoint reflected; extra columns get local defaults" diff --git a/src/test/subscription/pyt/test_020_messages.py b/src/test/subscription/pyt/test_020_messages.py new file mode 100644 index 0000000000000..89d0f7ce2e538 --- /dev/null +++ b/src/test/subscription/pyt/test_020_messages.py @@ -0,0 +1,124 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/020_messages.pl. + +Logical decoding messages (transactional and non-transactional). +""" + +_PEEK = """ +SELECT get_byte(data, 0) +FROM pg_logical_slot_peek_binary_changes('tap_sub', NULL, NULL, + 'proto_version', '1', 'publication_names', 'tap_pub', 'messages', 'true') +""" + + +def test_messages(create_pg): + """pg_logical_emit_message changes appear (or not) on the slot as expected.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf("autovacuum = off") + publisher.start() + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE tab_test (a int primary key)") + subscriber.safe_psql("CREATE TABLE tab_test (a int primary key)") + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE tab_test") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' PUBLICATION tap_pub".format( + connstr + ) + ) + publisher.wait_for_catchup("tap_sub") + + # Disable the subscription and wait for the slot to go inactive. + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub DISABLE") + assert publisher.poll_query_until( + "SELECT COUNT(*) FROM pg_catalog.pg_replication_slots " + "WHERE slot_name = 'tap_sub' AND active='f'", + expected="1", + ) + + publisher.safe_psql( + "SELECT pg_logical_emit_message(true, 'pgoutput', 'a transactional message')" + ) + + # 66 77 67 == B M C == BEGIN MESSAGE COMMIT + assert ( + publisher.safe_psql(_PEEK) == "66\n77\n67" + ), "messages on slot are B M C with message option" + + result = publisher.safe_psql( + """ + SELECT get_byte(data, 1), encode(substr(data, 11, 8), 'escape') + FROM pg_logical_slot_peek_binary_changes('tap_sub', NULL, NULL, + 'proto_version', '1', 'publication_names', 'tap_pub', + 'messages', 'true') + OFFSET 1 LIMIT 1 + """ + ) + assert ( + result == "1|pgoutput" + ), "flag transactional is set to 1 and prefix is pgoutput" + + # Without the messages option, the empty transaction is optimized away. + result = publisher.safe_psql( + """ + SELECT get_byte(data, 0) + FROM pg_logical_slot_get_binary_changes('tap_sub', NULL, NULL, + 'proto_version', '1', 'publication_names', 'tap_pub') + """ + ) + assert result == "", "messages defaults to false so M is not available on slot" + + publisher.safe_psql("INSERT INTO tab_test VALUES (1)") + message_lsn = publisher.safe_psql( + "SELECT pg_logical_emit_message(false, 'pgoutput', " + "'a non-transactional message')" + ) + publisher.safe_psql("INSERT INTO tab_test VALUES (2)") + + result = publisher.safe_psql( + """ + SELECT get_byte(data, 0), get_byte(data, 1) + FROM pg_logical_slot_get_binary_changes('tap_sub', NULL, NULL, + 'proto_version', '1', 'publication_names', 'tap_pub', + 'messages', 'true') + WHERE lsn = '{}' AND xid = 0 + """.format( + message_lsn + ) + ) + assert result == "77|0", "non-transactional message on slot is M" + + # A non-transactional message emitted inside an aborted transaction still + # shows up once the LSN advances (forced via a WAL switch). + publisher.safe_psql( + """ +BEGIN; +SELECT pg_logical_emit_message(false, 'pgoutput', +'a non-transactional message is available even if the transaction is aborted 1'); +INSERT INTO tab_test VALUES (3); +SELECT pg_logical_emit_message(true, 'pgoutput', +'a transactional message is not available if the transaction is aborted'); +SELECT pg_logical_emit_message(false, 'pgoutput', +'a non-transactional message is available even if the transaction is aborted 2'); +ROLLBACK; +SELECT pg_switch_wal(); +""" + ) + + result = publisher.safe_psql( + """ + SELECT get_byte(data, 0), get_byte(data, 1) + FROM pg_logical_slot_peek_binary_changes('tap_sub', NULL, NULL, + 'proto_version', '1', 'publication_names', 'tap_pub', + 'messages', 'true') + """ + ) + assert ( + result == "77|0\n77|0" + ), "non-transactional message on slot from aborted transaction is M" + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_021_twophase.py b/src/test/subscription/pyt/test_021_twophase.py new file mode 100644 index 0000000000000..8f19bb8d8b6f4 --- /dev/null +++ b/src/test/subscription/pyt/test_021_twophase.py @@ -0,0 +1,245 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/021_twophase.pl. + +Logical replication of two-phase commit (PREPARE/COMMIT PREPARED/ROLLBACK +PREPARED), including crash restarts and ALTER SUBSCRIPTION two_phase changes. +""" + +_APP = "tap_sub" +_APP_COPY = "appname_copy" +_TWOPHASE = ( + "SELECT count(1) = 0 FROM pg_subscription WHERE subtwophasestate NOT IN ('e');" +) +_NO_APPLY = ( + "SELECT count(*) = 0 FROM pg_stat_activity " + "WHERE backend_type = 'logical replication apply worker'" +) + + +def _prepared(node): + return node.safe_psql("SELECT count(*) FROM pg_prepared_xacts;") + + +def _setup(publisher, subscriber): + publisher.safe_psql("CREATE TABLE tab_full (a int PRIMARY KEY)") + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO tab_full SELECT generate_series(1,10);\n" + "PREPARE TRANSACTION 'some_initial_data';\n" + "COMMIT PREPARED 'some_initial_data';" + ) + subscriber.safe_psql("CREATE TABLE tab_full (a int PRIMARY KEY)") + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE tab_full") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub " + "WITH (two_phase = on)".format(connstr, _APP) + ) + subscriber.wait_for_subscription_sync(publisher, _APP) + assert subscriber.poll_query_until(_TWOPHASE), "twophase enabled" + return connstr + + +def _commit_then_rollback(publisher, subscriber): + # max_prepared_transactions = 0 on the subscriber makes apply fail first. + publisher.safe_psql( + "BEGIN;\nINSERT INTO tab_full VALUES (11);\n" + "PREPARE TRANSACTION 'test_prepared_tab_full';" + ) + subscriber.wait_for_log(r"ERROR: ( [A-Z0-9]+:)? prepared transactions are disabled") + subscriber.append_conf("max_prepared_transactions = 10") + subscriber.restart() + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + + publisher.safe_psql("COMMIT PREPARED 'test_prepared_tab_full';") + publisher.wait_for_catchup(_APP) + assert ( + subscriber.safe_psql("SELECT count(*) FROM tab_full where a = 11;") == "1" + ), "Row inserted via 2PC has committed on subscriber" + assert _prepared(subscriber) == "0", "transaction is committed on subscriber" + + publisher.safe_psql( + "BEGIN;\nINSERT INTO tab_full VALUES (12);\n" + "PREPARE TRANSACTION 'test_prepared_tab_full';" + ) + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + publisher.safe_psql("ROLLBACK PREPARED 'test_prepared_tab_full';") + publisher.wait_for_catchup(_APP) + assert ( + subscriber.safe_psql("SELECT count(*) FROM tab_full where a = 12;") == "0" + ), "Row inserted via 2PC is not present on subscriber" + assert _prepared(subscriber) == "0", "transaction is aborted on subscriber" + + +def _crash_restart(publisher, subscriber, vals, action, crash_pub, crash_sub): + publisher.safe_psql( + "BEGIN;\nINSERT INTO tab_full VALUES ({});\nINSERT INTO tab_full VALUES ({});\n" + "PREPARE TRANSACTION 'test_prepared_tab';".format(*vals) + ) + if crash_sub: + subscriber.stop("immediate") + if crash_pub: + publisher.stop("immediate") + if crash_pub: + publisher.start() + if crash_sub: + subscriber.start() + publisher.safe_psql("{} PREPARED 'test_prepared_tab';".format(action)) + publisher.wait_for_catchup(_APP) + expected = "2" if action == "COMMIT" else "0" + assert ( + subscriber.safe_psql( + "SELECT count(*) FROM tab_full where a IN ({},{});".format(*vals) + ) + == expected + ), "2PC {} decoded across crash restart".format(action) + + +def _nested_and_empty_gid(publisher, subscriber): + publisher.safe_psql( + "BEGIN;\nINSERT INTO tab_full VALUES (21);\nSAVEPOINT sp_inner;\n" + "INSERT INTO tab_full VALUES (22);\nROLLBACK TO SAVEPOINT sp_inner;\n" + "PREPARE TRANSACTION 'outer';" + ) + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + publisher.safe_psql("COMMIT PREPARED 'outer';") + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "0", "transaction is ended on subscriber" + assert ( + subscriber.safe_psql("SELECT a FROM tab_full where a IN (21,22);") == "21" + ), "Rows committed are on the subscriber" + + publisher.safe_psql( + "BEGIN;\nINSERT INTO tab_full VALUES (51);\nPREPARE TRANSACTION '';" + ) + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + publisher.safe_psql("ROLLBACK PREPARED '';") + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "0", "transaction is aborted on subscriber" + + +def _copy_data_false(publisher, subscriber, connstr): + publisher.safe_psql("CREATE TABLE tab_copy (a int PRIMARY KEY)") + publisher.safe_psql("INSERT INTO tab_copy SELECT generate_series(1,5);") + subscriber.safe_psql("CREATE TABLE tab_copy (a int PRIMARY KEY)") + subscriber.safe_psql("INSERT INTO tab_copy VALUES (88);") + assert subscriber.safe_psql("SELECT count(*) FROM tab_copy;") == "1", "initial data" + + publisher.safe_psql("CREATE PUBLICATION tap_pub_copy FOR TABLE tab_copy;") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub_copy CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub_copy " + "WITH (two_phase=on, copy_data=false);".format(connstr, _APP_COPY) + ) + subscriber.wait_for_subscription_sync(publisher, _APP_COPY) + assert subscriber.poll_query_until(_TWOPHASE), "twophase enabled" + assert subscriber.safe_psql("SELECT count(*) FROM tab_copy;") == "1", "no copy_data" + + publisher.safe_psql( + "BEGIN;\nINSERT INTO tab_copy VALUES (99);\nPREPARE TRANSACTION 'mygid';" + ) + publisher.wait_for_catchup(_APP_COPY) + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "2", "transaction prepared for both subscriptions" + publisher.safe_psql("COMMIT PREPARED 'mygid';") + assert ( + publisher.safe_psql("SELECT count(*) FROM tab_copy;") == "6" + ), "publisher inserted data" + publisher.wait_for_catchup(_APP_COPY) + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "0", "no prepared transactions on subscriber" + assert subscriber.safe_psql("SELECT count(*) FROM tab_copy;") == "2", "replicated" + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + + +def _alter_two_phase(publisher, subscriber): + slot = ( + "SELECT two_phase FROM pg_replication_slots WHERE slot_name = 'tap_sub_copy';" + ) + assert publisher.safe_psql(slot) == "t", "two-phase is enabled" + + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub_copy DISABLE;") + subscriber.poll_query_until(_NO_APPLY) + subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub_copy SET (two_phase = false);\n" + "ALTER SUBSCRIPTION tap_sub_copy ENABLE;" + ) + subscriber.wait_for_subscription_sync(publisher, _APP_COPY) + assert ( + subscriber.safe_psql( + "SELECT subtwophasestate FROM pg_subscription " + "WHERE subname = 'tap_sub_copy';" + ) + == "d" + ), "two-phase subscription option disabled" + assert publisher.safe_psql(slot) == "f", "two-phase slot option disabled" + + publisher.safe_psql( + "BEGIN;\nINSERT INTO tab_copy VALUES (100);\nPREPARE TRANSACTION 'newgid';" + ) + publisher.wait_for_catchup(_APP_COPY) + assert _prepared(subscriber) == "0", "no prepared transactions on subscriber" + + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub_copy DISABLE;") + subscriber.poll_query_until(_NO_APPLY) + subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub_copy SET (two_phase = true, failover = true);\n" + "ALTER SUBSCRIPTION tap_sub_copy ENABLE;" + ) + publisher.safe_psql("COMMIT PREPARED 'newgid';") + publisher.wait_for_catchup(_APP_COPY) + assert subscriber.safe_psql("SELECT count(*) FROM tab_copy;") == "3", "replicated" + assert ( + subscriber.safe_psql( + "SELECT subtwophasestate FROM pg_subscription " + "WHERE subname = 'tap_sub_copy';" + ) + == "e" + ), "two-phase should be enabled" + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub_copy;") + publisher.safe_psql("DROP PUBLICATION tap_pub_copy;") + + +def test_twophase(create_pg): + """Two-phase commit logical replication across commit/rollback and crashes.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf("max_prepared_transactions = 10") + publisher.start() + subscriber = create_pg("subscriber", start=False) + subscriber.append_conf("max_prepared_transactions = 0") + subscriber.start() + + connstr = _setup(publisher, subscriber) + _commit_then_rollback(publisher, subscriber) + + _crash_restart(publisher, subscriber, (12, 13), "ROLLBACK", True, True) + _crash_restart(publisher, subscriber, (12, 13), "COMMIT", True, True) + _crash_restart(publisher, subscriber, (14, 15), "COMMIT", False, True) + _crash_restart(publisher, subscriber, (16, 17), "COMMIT", True, False) + + _nested_and_empty_gid(publisher, subscriber) + _copy_data_false(publisher, subscriber, connstr) + _alter_two_phase(publisher, subscriber) + + assert ( + subscriber.safe_psql("SELECT count(*) FROM pg_subscription") == "0" + ), "subscription dropped on subscriber" + assert ( + publisher.safe_psql("SELECT count(*) FROM pg_replication_slots") == "0" + ), "replication slot dropped on publisher" + assert ( + subscriber.safe_psql("SELECT count(*) FROM pg_subscription_rel") == "0" + ), "subscription relation status dropped on subscriber" + assert ( + subscriber.safe_psql("SELECT count(*) FROM pg_replication_origin") == "0" + ), "replication origin dropped on subscriber" + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_022_twophase_cascade.py b/src/test/subscription/pyt/test_022_twophase_cascade.py new file mode 100644 index 0000000000000..cc274f9a31dc9 --- /dev/null +++ b/src/test/subscription/pyt/test_022_twophase_cascade.py @@ -0,0 +1,213 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/022_twophase_cascade.pl. + +Cascading logical replication of 2PC (node_A -> node_B -> node_C), both +non-streaming and streaming. +""" + +_APP_B = "tap_sub_B" +_APP_C = "tap_sub_C" +_TWOPHASE = ( + "SELECT count(1) = 0 FROM pg_subscription WHERE subtwophasestate NOT IN ('e');" +) + + +def _prepared(node): + return node.safe_psql("SELECT count(*) FROM pg_prepared_xacts;") + + +def _cascade_catchup(node_a, node_b): + node_a.wait_for_catchup(_APP_B) + node_b.wait_for_catchup(_APP_C) + + +def _both_prepared(node_b, node_c, expected, msg): + assert _prepared(node_b) == expected, msg + " B" + assert _prepared(node_c) == expected, msg + " C" + + +def _setup(node_a, node_b, node_c): + for node in (node_a, node_b, node_c): + node.append_conf( + "max_prepared_transactions = 10\nlogical_decoding_work_mem = 64kB" + ) + node.start() + + node_a.safe_psql("CREATE TABLE tab_full (a int PRIMARY KEY)") + node_a.safe_psql("INSERT INTO tab_full SELECT generate_series(1,10);") + node_a.safe_psql("CREATE TABLE test_tab (a int primary key, b bytea)") + node_a.safe_psql("INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')") + for node in (node_b, node_c): + node.safe_psql("CREATE TABLE tab_full (a int PRIMARY KEY)") + node.safe_psql( + "CREATE TABLE test_tab (a int primary key, b bytea, " + "c timestamptz DEFAULT now(), d bigint DEFAULT 999)" + ) + + a_connstr = node_a.connstr() + " dbname=postgres" + node_a.safe_psql("CREATE PUBLICATION tap_pub_A FOR TABLE tab_full, test_tab") + node_b.safe_psql( + "CREATE SUBSCRIPTION tap_sub_B CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub_A " + "WITH (two_phase = on, streaming = off)".format(a_connstr, _APP_B) + ) + b_connstr = node_b.connstr() + " dbname=postgres" + node_b.safe_psql("CREATE PUBLICATION tap_pub_B FOR TABLE tab_full, test_tab") + node_c.safe_psql( + "CREATE SUBSCRIPTION tap_sub_C CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub_B " + "WITH (two_phase = on, streaming = off)".format(b_connstr, _APP_C) + ) + + node_a.wait_for_catchup(_APP_B) + node_b.wait_for_catchup(_APP_C) + assert node_b.poll_query_until(_TWOPHASE), "twophase enabled on B" + assert node_c.poll_query_until(_TWOPHASE), "twophase enabled on C" + + +def _non_streaming(node_a, node_b, node_c): + node_a.safe_psql( + "BEGIN;\nINSERT INTO tab_full VALUES (11);\n" + "PREPARE TRANSACTION 'test_prepared_tab_full';" + ) + _cascade_catchup(node_a, node_b) + _both_prepared(node_b, node_c, "1", "transaction is prepared on subscriber") + node_a.safe_psql("COMMIT PREPARED 'test_prepared_tab_full';") + _cascade_catchup(node_a, node_b) + for node, who in ((node_b, "B"), (node_c, "C")): + assert node.safe_psql("SELECT count(*) FROM tab_full where a = 11;") == "1", ( + "Row inserted via 2PC has committed on subscriber " + who + ) + _both_prepared(node_b, node_c, "0", "transaction is committed on subscriber") + + node_a.safe_psql( + "BEGIN;\nINSERT INTO tab_full VALUES (12);\n" + "PREPARE TRANSACTION 'test_prepared_tab_full';" + ) + _cascade_catchup(node_a, node_b) + _both_prepared(node_b, node_c, "1", "transaction is prepared on subscriber") + node_a.safe_psql("ROLLBACK PREPARED 'test_prepared_tab_full';") + _cascade_catchup(node_a, node_b) + for node, who in ((node_b, "B"), (node_c, "C")): + assert node.safe_psql("SELECT count(*) FROM tab_full where a = 12;") == "0", ( + "Row inserted via 2PC is not present on subscriber " + who + ) + _both_prepared(node_b, node_c, "0", "transaction is ended on subscriber") + + # Nested transaction with savepoint rollback. + node_a.safe_psql( + "BEGIN;\nINSERT INTO tab_full VALUES (21);\nSAVEPOINT sp_inner;\n" + "INSERT INTO tab_full VALUES (22);\nROLLBACK TO SAVEPOINT sp_inner;\n" + "PREPARE TRANSACTION 'outer';" + ) + _cascade_catchup(node_a, node_b) + _both_prepared(node_b, node_c, "1", "transaction is prepared on subscriber") + node_a.safe_psql("COMMIT PREPARED 'outer';") + _cascade_catchup(node_a, node_b) + _both_prepared(node_b, node_c, "0", "transaction is ended on subscriber") + for node, who in ((node_b, "B"), (node_c, "C")): + assert node.safe_psql("SELECT a FROM tab_full where a IN (21,22);") == "21", ( + "Rows committed are present on subscriber " + who + ) + + +def _enable_streaming(node_a, node_b, node_c): + oldpid_b = node_a.safe_psql( + "SELECT pid FROM pg_stat_replication WHERE application_name = '{}' " + "AND state = 'streaming';".format(_APP_B) + ) + oldpid_c = node_b.safe_psql( + "SELECT pid FROM pg_stat_replication WHERE application_name = '{}' " + "AND state = 'streaming';".format(_APP_C) + ) + node_b.safe_psql("ALTER SUBSCRIPTION tap_sub_B SET (streaming = on);") + node_c.safe_psql("ALTER SUBSCRIPTION tap_sub_C SET (streaming = on)") + assert node_a.poll_query_until( + "SELECT pid != {} FROM pg_stat_replication WHERE application_name = '{}' " + "AND state = 'streaming';".format(oldpid_b, _APP_B) + ), "apply restarted (B)" + assert node_b.poll_query_until( + "SELECT pid != {} FROM pg_stat_replication WHERE application_name = '{}' " + "AND state = 'streaming';".format(oldpid_c, _APP_C) + ), "apply restarted (C)" + + +def _streaming_2pc(node_a, node_b, node_c): + node_a.safe_psql( + "BEGIN;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea) " + "FROM generate_series(3, 5000) s(i);\n" + "UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0;\n" + "DELETE FROM test_tab WHERE mod(a,3) = 0;\n" + "PREPARE TRANSACTION 'test_prepared_tab';" + ) + _cascade_catchup(node_a, node_b) + _both_prepared(node_b, node_c, "1", "transaction is prepared on subscriber") + node_a.safe_psql("COMMIT PREPARED 'test_prepared_tab';") + _cascade_catchup(node_a, node_b) + for node, who in ((node_b, "B"), (node_c, "C")): + assert ( + node.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + == "3334|3334|3334" + ), ("Rows inserted by 2PC committed on subscriber " + who) + _both_prepared(node_b, node_c, "0", "transaction is committed on subscriber") + + # Streamed 2PC with a nested ROLLBACK TO SAVEPOINT. + node_a.safe_psql("DELETE FROM test_tab WHERE a > 2;") + node_a.safe_psql( + "BEGIN;\nINSERT INTO test_tab VALUES (9999, 'foobar');\nSAVEPOINT sp_inner;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea) " + "FROM generate_series(3, 5000) s(i);\n" + "UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0;\n" + "DELETE FROM test_tab WHERE mod(a,3) = 0;\n" + "ROLLBACK TO SAVEPOINT sp_inner;\n" + "PREPARE TRANSACTION 'outer';" + ) + _cascade_catchup(node_a, node_b) + _both_prepared(node_b, node_c, "1", "transaction is prepared on subscriber") + node_a.safe_psql("COMMIT PREPARED 'outer';") + _cascade_catchup(node_a, node_b) + _both_prepared(node_b, node_c, "0", "transaction is ended on subscriber") + for node, who in ((node_b, "B"), (node_c, "C")): + assert ( + node.safe_psql("SELECT count(*) FROM test_tab where b = 'foobar';") == "1" + ), ("Rows committed are present on subscriber " + who) + assert node.safe_psql("SELECT count(*) FROM test_tab;") == "3", ( + "Rows committed are present on subscriber " + who + ) + + +def _cleanup(node_a, node_b, node_c): + node_c.safe_psql("DROP SUBSCRIPTION tap_sub_C") + assert node_c.safe_psql("SELECT count(*) FROM pg_subscription") == "0", "C sub" + assert node_c.safe_psql("SELECT count(*) FROM pg_subscription_rel") == "0", "C rel" + assert node_c.safe_psql("SELECT count(*) FROM pg_replication_origin") == "0", "C ro" + assert ( + node_b.safe_psql("SELECT count(*) FROM pg_replication_slots") == "0" + ), "B slot" + + node_b.safe_psql("DROP SUBSCRIPTION tap_sub_B") + assert node_b.safe_psql("SELECT count(*) FROM pg_subscription") == "0", "B sub" + assert node_b.safe_psql("SELECT count(*) FROM pg_subscription_rel") == "0", "B rel" + assert node_b.safe_psql("SELECT count(*) FROM pg_replication_origin") == "0", "B ro" + assert ( + node_a.safe_psql("SELECT count(*) FROM pg_replication_slots") == "0" + ), "A slot" + + +def test_twophase_cascade(create_pg): + """Cascading 2PC logical replication, non-streaming and streaming.""" + node_a = create_pg("node_A", allows_streaming="logical", start=False) + node_b = create_pg("node_B", allows_streaming="logical", start=False) + node_c = create_pg("node_C", start=False) + + _setup(node_a, node_b, node_c) + _non_streaming(node_a, node_b, node_c) + _enable_streaming(node_a, node_b, node_c) + _streaming_2pc(node_a, node_b, node_c) + _cleanup(node_a, node_b, node_c) + + node_c.stop("fast") + node_b.stop("fast") + node_a.stop("fast") diff --git a/src/test/subscription/pyt/test_023_twophase_stream.py b/src/test/subscription/pyt/test_023_twophase_stream.py new file mode 100644 index 0000000000000..f49925e073ea4 --- /dev/null +++ b/src/test/subscription/pyt/test_023_twophase_stream.py @@ -0,0 +1,218 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/023_twophase_stream.pl. + +Logical replication of 2PC with streaming (streaming=on and parallel), plus +serialize-to-file and parallel-apply retry on insufficient +max_prepared_transactions. +""" + +_APP = "tap_sub" +_TWOPHASE = ( + "SELECT count(1) = 0 FROM pg_subscription WHERE subtwophasestate NOT IN ('e');" +) +_PREPARE_BLOCK = ( + "BEGIN;\n" + "INSERT INTO test_tab SELECT i, sha256(i::text::bytea) " + "FROM generate_series(3, 5) s(i);\n" + "UPDATE test_tab SET b = sha256(b) WHERE mod(a,2) = 0;\n" + "DELETE FROM test_tab WHERE mod(a,3) = 0;\n" + "PREPARE TRANSACTION 'test_prepared_tab';" +) + + +def _prepared(node): + return node.safe_psql("SELECT count(*) FROM pg_prepared_xacts;") + + +def _agg(node): + return node.safe_psql("SELECT count(*), count(c), count(d = 999) FROM test_tab") + + +def _check_parallel(subscriber, offset, is_parallel): + if is_parallel: + subscriber.wait_for_log( + r"DEBUG: ( [A-Z0-9]+:)? finished processing the STREAM PREPARE command", + offset, + ) + + +def _test_streaming(publisher, subscriber, is_parallel): + # 2PC PREPARE / COMMIT PREPARED. + offset = subscriber.current_log_position() + publisher.safe_psql(_PREPARE_BLOCK) + publisher.wait_for_catchup(_APP) + _check_parallel(subscriber, offset, is_parallel) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + publisher.safe_psql("COMMIT PREPARED 'test_prepared_tab';") + publisher.wait_for_catchup(_APP) + assert _agg(subscriber) == "4|4|4", "2PC committed; extra columns local defaults" + assert _prepared(subscriber) == "0", "transaction is committed on subscriber" + + # 2PC PREPARE / ROLLBACK PREPARED. + publisher.safe_psql("DELETE FROM test_tab WHERE a > 2;") + offset = subscriber.current_log_position() + publisher.safe_psql(_PREPARE_BLOCK) + publisher.wait_for_catchup(_APP) + _check_parallel(subscriber, offset, is_parallel) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + publisher.safe_psql("ROLLBACK PREPARED 'test_prepared_tab';") + publisher.wait_for_catchup(_APP) + assert _agg(subscriber) == "2|2|2", "2PC rolled back to the original 2 rows" + assert _prepared(subscriber) == "0", "transaction is aborted on subscriber" + + # COMMIT PREPARED decoded across a crash restart of both nodes. + offset = subscriber.current_log_position() + publisher.safe_psql(_PREPARE_BLOCK) + subscriber.stop("immediate") + publisher.stop("immediate") + publisher.start() + subscriber.start() + publisher.safe_psql("COMMIT PREPARED 'test_prepared_tab';") + publisher.wait_for_catchup(_APP) + assert _agg(subscriber) == "4|4|4", "2PC committed after crash restart" + + # INSERT after PREPARE, before ROLLBACK PREPARED. + publisher.safe_psql("DELETE FROM test_tab WHERE a > 2;") + offset = subscriber.current_log_position() + publisher.safe_psql(_PREPARE_BLOCK) + publisher.wait_for_catchup(_APP) + _check_parallel(subscriber, offset, is_parallel) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + publisher.safe_psql("INSERT INTO test_tab VALUES (99999, 'foobar')") + publisher.safe_psql("ROLLBACK PREPARED 'test_prepared_tab';") + publisher.wait_for_catchup(_APP) + assert _agg(subscriber) == "3|3|3", "the outside insert was copied to subscriber" + assert _prepared(subscriber) == "0", "transaction is aborted on subscriber" + + # INSERT after PREPARE, before COMMIT PREPARED. + publisher.safe_psql("DELETE FROM test_tab WHERE a > 2;") + offset = subscriber.current_log_position() + publisher.safe_psql(_PREPARE_BLOCK) + publisher.wait_for_catchup(_APP) + _check_parallel(subscriber, offset, is_parallel) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + publisher.safe_psql("INSERT INTO test_tab VALUES (99999, 'foobar')") + publisher.safe_psql("COMMIT PREPARED 'test_prepared_tab';") + publisher.wait_for_catchup(_APP) + assert _agg(subscriber) == "5|5|5", "2PC plus outside insert committed" + assert _prepared(subscriber) == "0", "transaction is committed on subscriber" + + publisher.safe_psql("DELETE FROM test_tab WHERE a > 2;") + publisher.wait_for_catchup(_APP) + + +def _test_serialize_and_retry(publisher, subscriber): + """Serialize-to-file path and parallel-apply retry on max_prepared=0.""" + subscriber.append_conf("debug_logical_replication_streaming = immediate") + subscriber.append_conf("log_min_messages = warning") + subscriber.reload() + subscriber.safe_psql("SELECT 1") + + offset = subscriber.current_log_position() + publisher.safe_psql( + "BEGIN;\nINSERT INTO test_tab_2 values(1);\nPREPARE TRANSACTION 'xact';" + ) + subscriber.wait_for_log( + r"LOG: ( [A-Z0-9]+:)? logical replication apply worker will serialize the " + r"remaining changes of remote transaction \d+ to a file", + offset, + ) + publisher.wait_for_catchup(_APP) + assert _prepared(subscriber) == "1", "transaction is prepared on subscriber" + publisher.safe_psql("COMMIT PREPARED 'xact';") + publisher.wait_for_catchup(_APP) + assert ( + subscriber.safe_psql("SELECT count(*) FROM test_tab_2") == "1" + ), "transaction is committed on subscriber" + + # Parallel apply worker fails to PREPARE when max_prepared_transactions = 0, + # then the transaction is re-applied after the setting is fixed. + subscriber.append_conf( + "max_prepared_transactions = 0\ndebug_logical_replication_streaming = buffered" + ) + subscriber.restart() + publisher.safe_psql( + "BEGIN;\nINSERT INTO test_tab_2 values(2);\n" + "PREPARE TRANSACTION 'xact';\nCOMMIT PREPARED 'xact';" + ) + offset = subscriber.current_log_position() + subscriber.wait_for_log( + r"ERROR: ( [A-Z0-9]+:)? prepared transactions are disabled", offset + ) + subscriber.wait_for_log( + r"ERROR: .*logical replication parallel apply worker.*", offset + ) + subscriber.append_conf("max_prepared_transactions = 10") + subscriber.restart() + publisher.wait_for_catchup(_APP) + assert ( + subscriber.safe_psql("SELECT count(*) FROM test_tab_2") == "2" + ), "transaction is committed on subscriber after retrying" + + +def test_twophase_stream(create_pg): + """Streamed 2PC across streaming=on and parallel, serialize, and retry.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf( + "max_prepared_transactions = 10\n" + "debug_logical_replication_streaming = immediate" + ) + publisher.start() + subscriber = create_pg("subscriber", start=False) + subscriber.append_conf("max_prepared_transactions = 10") + subscriber.start() + + publisher.safe_psql("CREATE TABLE test_tab (a int primary key, b bytea)") + publisher.safe_psql("INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')") + publisher.safe_psql("CREATE TABLE test_tab_2 (a int)") + subscriber.safe_psql( + "CREATE TABLE test_tab (a int primary key, b bytea, " + "c timestamptz DEFAULT now(), d bigint DEFAULT 999)" + ) + subscriber.safe_psql("CREATE TABLE test_tab_2 (a int)") + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE test_tab, test_tab_2") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub " + "WITH (streaming = on, two_phase = on)".format(connstr, _APP) + ) + subscriber.wait_for_subscription_sync(publisher, _APP) + assert subscriber.poll_query_until(_TWOPHASE), "twophase enabled" + assert _agg(subscriber) == "2|2|2", "check initial data was copied to subscriber" + + _test_streaming(publisher, subscriber, False) + + # Switch to streaming = parallel. + oldpid = publisher.safe_psql( + "SELECT pid FROM pg_stat_replication WHERE application_name = '{}' " + "AND state = 'streaming';".format(_APP) + ) + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub SET(streaming = parallel)") + assert publisher.poll_query_until( + "SELECT pid != {} FROM pg_stat_replication WHERE application_name = '{}' " + "AND state = 'streaming';".format(oldpid, _APP) + ), "apply restarted after changing SUBSCRIPTION" + subscriber.append_conf("log_min_messages = debug1") + subscriber.reload() + subscriber.safe_psql("SELECT 1") + + _test_streaming(publisher, subscriber, True) + _test_serialize_and_retry(publisher, subscriber) + + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + assert subscriber.safe_psql("SELECT count(*) FROM pg_subscription") == "0", "sub" + assert ( + publisher.safe_psql("SELECT count(*) FROM pg_replication_slots") == "0" + ), "slot dropped on publisher" + assert ( + subscriber.safe_psql("SELECT count(*) FROM pg_subscription_rel") == "0" + ), "subscription relation status dropped" + assert ( + subscriber.safe_psql("SELECT count(*) FROM pg_replication_origin") == "0" + ), "replication origin dropped" + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_024_add_drop_pub.py b/src/test/subscription/pyt/test_024_add_drop_pub.py new file mode 100644 index 0000000000000..4377949fb4eb2 --- /dev/null +++ b/src/test/subscription/pyt/test_024_add_drop_pub.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/024_add_drop_pub.pl. + +ALTER SUBSCRIPTION ... ADD/DROP/SET PUBLICATION, and that creating a missing +publication later does not break logical replication. +""" + + +def test_add_drop_pub(create_pg): + """ADD/DROP/SET PUBLICATION and recovery after a missing publication.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + + publisher.safe_psql("CREATE TABLE tab_1 (a int)") + publisher.safe_psql("INSERT INTO tab_1 SELECT generate_series(1,10)") + subscriber.safe_psql("CREATE TABLE tab_1 (a int)") + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub_1 FOR TABLE tab_1") + publisher.safe_psql("CREATE PUBLICATION tap_pub_2") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' " + "PUBLICATION tap_pub_1, tap_pub_2".format(connstr) + ) + + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_1") == "10|1|10" + ), "check initial data is copied to subscriber" + + publisher.safe_psql("CREATE TABLE tab_2 (a int)") + publisher.safe_psql("INSERT INTO tab_2 SELECT generate_series(1,10)") + subscriber.safe_psql("CREATE TABLE tab_2 (a int)") + publisher.safe_psql("ALTER PUBLICATION tap_pub_2 ADD TABLE tab_2") + + # Dropping tap_pub_1 refreshes the entire publication list. + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub DROP PUBLICATION tap_pub_1") + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_2") == "10|1|10" + ), "check initial data is copied to subscriber" + + # Re-adding tap_pub_1 refreshes the entire publication list. + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub ADD PUBLICATION tap_pub_1") + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM tab_1") == "20|1|10" + ), "check initial data is copied to subscriber" + + # A missing publication should log a warning but not disrupt replication. + publisher.safe_psql("CREATE TABLE tab_3 (a int)") + subscriber.safe_psql("CREATE TABLE tab_3 (a int)") + oldpid = publisher.safe_psql( + "SELECT pid FROM pg_stat_replication " + "WHERE application_name = 'tap_sub' AND state = 'streaming';" + ) + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_3") + assert publisher.poll_query_until( + "SELECT pid != {} FROM pg_stat_replication " + "WHERE application_name = 'tap_sub' AND state = 'streaming';".format(oldpid) + ), "apply worker to restart after altering the subscription" + + offset = publisher.current_log_position() + publisher.safe_psql("INSERT INTO tab_3 values(1)") + publisher.wait_for_log( + r'WARNING: ( [A-Z0-9]+:)? skipped loading publication "tap_pub_3"', offset + ) + + publisher.safe_psql("CREATE PUBLICATION tap_pub_3 FOR TABLE tab_3") + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION") + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + + publisher.safe_psql("INSERT INTO tab_3 values(2)") + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql("SELECT * FROM tab_3") == "1\n2" + ), "incremental data replicated after the publication is created" + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_025_rep_changes_for_schema.py b/src/test/subscription/pyt/test_025_rep_changes_for_schema.py new file mode 100644 index 0000000000000..37b98a9503e4e --- /dev/null +++ b/src/test/subscription/pyt/test_025_rep_changes_for_schema.py @@ -0,0 +1,136 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/025_rep_changes_for_schema.pl. + +Logical replication with FOR TABLES IN SCHEMA publications. +""" + + +def _rel_count(subscriber): + return subscriber.safe_psql( + "SELECT count(*) FROM pg_subscription_rel WHERE srsubid IN " + "(SELECT oid FROM pg_subscription WHERE subname = 'tap_sub_schema')" + ) + + +def test_rep_changes_for_schema(create_pg): + """FOR TABLES IN SCHEMA: initial sync, refresh, schema moves, drops.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + connstr = publisher.connstr() + " dbname=postgres" + + publisher.safe_psql("CREATE SCHEMA sch1") + publisher.safe_psql("CREATE TABLE sch1.tab1 AS SELECT generate_series(1,10) AS a") + publisher.safe_psql("CREATE TABLE sch1.tab2 AS SELECT generate_series(1,10) AS a") + publisher.safe_psql( + "CREATE TABLE sch1.tab1_parent (a int PRIMARY KEY, b text) " + "PARTITION BY LIST (a)" + ) + publisher.safe_psql( + "CREATE TABLE public.tab1_child1 PARTITION OF sch1.tab1_parent " + "FOR VALUES IN (1, 2, 3)" + ) + publisher.safe_psql( + "CREATE TABLE public.tab1_child2 PARTITION OF sch1.tab1_parent " + "FOR VALUES IN (4, 5, 6)" + ) + publisher.safe_psql("INSERT INTO sch1.tab1_parent values (1),(4)") + + subscriber.safe_psql("CREATE SCHEMA sch1") + subscriber.safe_psql("CREATE TABLE sch1.tab1 (a int)") + subscriber.safe_psql("CREATE TABLE sch1.tab2 (a int)") + subscriber.safe_psql( + "CREATE TABLE sch1.tab1_parent (a int PRIMARY KEY, b text) " + "PARTITION BY LIST (a)" + ) + subscriber.safe_psql( + "CREATE TABLE public.tab1_child1 PARTITION OF sch1.tab1_parent " + "FOR VALUES IN (1, 2, 3)" + ) + subscriber.safe_psql( + "CREATE TABLE public.tab1_child2 PARTITION OF sch1.tab1_parent " + "FOR VALUES IN (4, 5, 6)" + ) + + publisher.safe_psql("CREATE PUBLICATION tap_pub_schema FOR TABLES IN SCHEMA sch1") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub_schema CONNECTION '{}' " + "PUBLICATION tap_pub_schema".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub_schema") + + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM sch1.tab1") + == "10|1|10" + ), "check rows on subscriber catchup" + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM sch1.tab2") + == "10|1|10" + ), "check rows on subscriber catchup" + assert ( + subscriber.safe_psql("SELECT * FROM sch1.tab1_parent order by 1") == "1|\n4|" + ), "check rows on subscriber catchup" + + publisher.safe_psql("INSERT INTO sch1.tab1 VALUES(generate_series(11,20))") + publisher.safe_psql("INSERT INTO sch1.tab1_parent values (2),(5)") + publisher.wait_for_catchup("tap_sub_schema") + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM sch1.tab1") + == "20|1|20" + ), "check replicated inserts on subscriber" + assert ( + subscriber.safe_psql("SELECT * FROM sch1.tab1_parent order by 1") + == "1|\n2|\n4|\n5|" + ), "check replicated inserts on subscriber" + + # A new table in the schema is not synced until the publication is refreshed. + publisher.safe_psql("CREATE TABLE sch1.tab3 AS SELECT generate_series(1,10) AS a") + subscriber.safe_psql("CREATE TABLE sch1.tab3(a int)") + publisher.wait_for_catchup("tap_sub_schema") + assert subscriber.safe_psql("SELECT count(*) FROM sch1.tab3") == "0" + + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub_schema REFRESH PUBLICATION") + subscriber.wait_for_subscription_sync() + publisher.safe_psql("INSERT INTO sch1.tab3 VALUES(11)") + publisher.wait_for_catchup("tap_sub_schema") + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM sch1.tab3") + == "11|1|11" + ), "check rows on subscriber catchup" + + # Moving a table out of the schema stops its replication. + publisher.safe_psql("ALTER TABLE sch1.tab3 SET SCHEMA public") + publisher.safe_psql("INSERT INTO public.tab3 VALUES(12)") + publisher.wait_for_catchup("tap_sub_schema") + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM sch1.tab3") + == "11|1|11" + ), "check replicated inserts on subscriber" + + assert _rel_count(subscriber) == "5", "relation status not yet dropped" + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub_schema REFRESH PUBLICATION") + subscriber.wait_for_subscription_sync() + assert _rel_count(subscriber) == "4", "relation status was dropped" + + # Dropping a table removes it from pg_subscription_rel after refresh. + publisher.safe_psql("DROP TABLE sch1.tab2") + publisher.wait_for_catchup("tap_sub_schema") + assert _rel_count(subscriber) == "4", "relation status not yet dropped" + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub_schema REFRESH PUBLICATION") + subscriber.wait_for_subscription_sync() + assert _rel_count(subscriber) == "3", "relation status was dropped" + + # Dropping the schema from the publication stops publishing (2nd insert). + publisher.safe_psql( + "INSERT INTO sch1.tab1 VALUES(21);\n" + "ALTER PUBLICATION tap_pub_schema DROP TABLES IN SCHEMA sch1;\n" + "INSERT INTO sch1.tab1 values(22);" + ) + publisher.wait_for_catchup("tap_sub_schema") + assert ( + subscriber.safe_psql("SELECT count(*), min(a), max(a) FROM sch1.tab1") + == "21|1|21" + ), "check replicated inserts on subscriber" + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_026_stats.py b/src/test/subscription/pyt/test_026_stats.py new file mode 100644 index 0000000000000..f12a8366f456a --- /dev/null +++ b/src/test/subscription/pyt/test_026_stats.py @@ -0,0 +1,222 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/026_stats.pl. + +Subscription statistics (errors, conflicts, and resets). +""" + +_DB = "postgres" +_ALL_NONZERO = ( + "SELECT apply_error_count > 0, sync_seq_error_count > 0, " + "sync_table_error_count > 0, confl_insert_exists > 0, " + "confl_delete_missing > 0, stats_reset IS NULL " + "FROM pg_stat_subscription_stats WHERE subname = '{}'" +) +_ALL_ZERO = ( + "SELECT apply_error_count = 0, sync_seq_error_count = 0, " + "sync_table_error_count = 0, confl_insert_exists = 0, " + "confl_delete_missing = 0, stats_reset IS NOT NULL " + "FROM pg_stat_subscription_stats WHERE subname = '{}'" +) +_SIX_T = "t|t|t|t|t|t" + + +def _create_sub_pub_w_errors(publisher, subscriber, table_name, sequence_name): + """Set up a sub/pub that hits sync, sequencesync, apply and conflict errors.""" + publisher.safe_psql( + "BEGIN;\n" + "CREATE TABLE {0}(a int);\n" + "ALTER TABLE {0} REPLICA IDENTITY FULL;\n" + "INSERT INTO {0} VALUES (1);\n" + "CREATE SEQUENCE {1};\n" + "COMMIT;".format(table_name, sequence_name), + dbname=_DB, + ) + subscriber.safe_psql( + "BEGIN;\n" + "CREATE TABLE {0}(a int primary key);\n" + "INSERT INTO {0} VALUES (1);\n" + "CREATE SEQUENCE {1} INCREMENT BY 10;\n" + "COMMIT;".format(table_name, sequence_name), + dbname=_DB, + ) + + pub_name = table_name + "_pub" + pub_seq_name = sequence_name + "_pub" + connstr = publisher.connstr() + " dbname={}".format(_DB) + publisher.safe_psql( + "CREATE PUBLICATION {} FOR TABLE {};\n" + "CREATE PUBLICATION {} FOR ALL SEQUENCES;".format( + pub_name, table_name, pub_seq_name + ), + dbname=_DB, + ) + sub_name = table_name + "_sub" + subscriber.safe_psql( + "CREATE SUBSCRIPTION {} CONNECTION '{}' PUBLICATION {}, {}".format( + sub_name, connstr, pub_name, pub_seq_name + ), + dbname=_DB, + ) + publisher.wait_for_catchup(sub_name) + + assert subscriber.poll_query_until( + "SELECT count(1) = 1 FROM pg_stat_subscription_stats WHERE subname = '{}' " + "AND sync_seq_error_count > 0 AND sync_table_error_count > 0".format(sub_name), + dbname=_DB, + ), "sequencesync and tablesync errors reported" + + subscriber.safe_psql( + "ALTER SEQUENCE {} INCREMENT 1".format(sequence_name), dbname=_DB + ) + assert subscriber.poll_query_until( + "SELECT count(1) = 1 FROM pg_subscription_rel " + "WHERE srrelid = '{}'::regclass AND srsubstate = 'r'".format(sequence_name), + dbname=_DB, + ), "sequencesync finished" + + subscriber.safe_psql("TRUNCATE {}".format(table_name), dbname=_DB) + assert subscriber.poll_query_until( + "SELECT count(1) = 1 FROM pg_subscription_rel " + "WHERE srrelid = '{}'::regclass AND srsubstate in ('r', 's')".format( + table_name + ), + dbname=_DB, + ), "tablesync finished" + assert ( + subscriber.safe_psql("SELECT a FROM {}".format(table_name), dbname=_DB) == "1" + ), "table now has 1 row" + + publisher.safe_psql("INSERT INTO {} VALUES (1)".format(table_name), dbname=_DB) + assert subscriber.poll_query_until( + "SELECT apply_error_count > 0 AND confl_insert_exists > 0 " + "FROM pg_stat_subscription_stats WHERE subname = '{}'".format(sub_name), + dbname=_DB, + ), "apply error and insert_exists conflict reported" + + subscriber.safe_psql("TRUNCATE {}".format(table_name), dbname=_DB) + publisher.safe_psql("DELETE FROM {};".format(table_name), dbname=_DB) + assert subscriber.poll_query_until( + "SELECT confl_delete_missing > 0 FROM pg_stat_subscription_stats " + "WHERE subname = '{}'".format(sub_name), + dbname=_DB, + ), "delete_missing conflict reported" + return pub_name, sub_name + + +def _reset_one(subscriber, sub_name): + subscriber.safe_psql( + "SELECT pg_stat_reset_subscription_stats((SELECT subid FROM " + "pg_stat_subscription_stats WHERE subname = '{}'))".format(sub_name), + dbname=_DB, + ) + + +def _reset_time(subscriber, sub_name): + return subscriber.safe_psql( + "SELECT stats_reset FROM pg_stat_subscription_stats " + "WHERE subname = '{}'".format(sub_name), + dbname=_DB, + ) + + +def test_stats(create_pg): + """Subscription stat counters, conflicts, per-sub and global resets.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + + assert ( + subscriber.safe_psql( + "SELECT count(1) FROM pg_stat_subscription_stats", dbname=_DB + ) + == "0" + ), "no subscription errors before logical replication" + + _, sub1 = _create_sub_pub_w_errors(publisher, subscriber, "test_tab1", "test_seq1") + assert ( + subscriber.safe_psql(_ALL_NONZERO.format(sub1), dbname=_DB) == _SIX_T + ), "errors/conflicts > 0 and stats_reset NULL for {}".format(sub1) + + _reset_one(subscriber, sub1) + assert ( + subscriber.safe_psql(_ALL_ZERO.format(sub1), dbname=_DB) == _SIX_T + ), "errors/conflicts 0 and stats_reset not NULL after reset for {}".format(sub1) + + reset_time1 = _reset_time(subscriber, sub1) + _reset_one(subscriber, sub1) + assert ( + subscriber.safe_psql( + "SELECT stats_reset > '{}'::timestamptz FROM pg_stat_subscription_stats " + "WHERE subname = '{}'".format(reset_time1, sub1), + dbname=_DB, + ) + == "t" + ), "reset timestamp newer after second reset for {}".format(sub1) + + _, sub2 = _create_sub_pub_w_errors(publisher, subscriber, "test_tab2", "test_seq2") + assert ( + subscriber.safe_psql(_ALL_NONZERO.format(sub2), dbname=_DB) == _SIX_T + ), "errors/conflicts > 0 and stats_reset NULL for {}".format(sub2) + + # Reset all subscriptions. + subscriber.safe_psql("SELECT pg_stat_reset_subscription_stats(NULL)", dbname=_DB) + assert ( + subscriber.safe_psql(_ALL_ZERO.format(sub1), dbname=_DB) == _SIX_T + ), "errors/conflicts 0 after global reset for {}".format(sub1) + assert ( + subscriber.safe_psql(_ALL_ZERO.format(sub2), dbname=_DB) == _SIX_T + ), "errors/conflicts 0 after global reset for {}".format(sub2) + + reset_time1 = _reset_time(subscriber, sub1) + reset_time2 = _reset_time(subscriber, sub2) + subscriber.safe_psql("SELECT pg_stat_reset_subscription_stats(NULL)", dbname=_DB) + for sub, when in ((sub1, reset_time1), (sub2, reset_time2)): + assert ( + subscriber.safe_psql( + "SELECT stats_reset > '{}'::timestamptz FROM " + "pg_stat_subscription_stats WHERE subname = '{}'".format(when, sub), + dbname=_DB, + ) + == "t" + ), "reset timestamp newer after second global reset for {}".format(sub) + + sub1_oid = subscriber.safe_psql( + "SELECT oid FROM pg_subscription WHERE subname = '{}'".format(sub1), dbname=_DB + ) + subscriber.safe_psql("DROP SUBSCRIPTION {}".format(sub1), dbname=_DB) + assert ( + subscriber.safe_psql( + "SELECT pg_stat_have_stats('subscription', 0, {})".format(sub1_oid), + dbname=_DB, + ) + == "f" + ), "subscription stats for {} removed".format(sub1) + + sub2_oid = subscriber.safe_psql( + "SELECT oid FROM pg_subscription WHERE subname = '{}'".format(sub2), dbname=_DB + ) + subscriber.safe_psql( + "ALTER SUBSCRIPTION {0} DISABLE;\n" + "ALTER SUBSCRIPTION {0} SET (slot_name = NONE);\n" + "DROP SUBSCRIPTION {0};".format(sub2), + dbname=_DB, + ) + assert ( + subscriber.safe_psql( + "SELECT pg_stat_have_stats('subscription', 0, {})".format(sub2_oid), + dbname=_DB, + ) + == "f" + ), "subscription stats for {} removed".format(sub2) + + assert publisher.poll_query_until( + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots " + "WHERE slot_name = '{}' AND active_pid IS NULL)".format(sub2), + dbname=_DB, + ), "slot became inactive" + publisher.safe_psql( + "SELECT pg_drop_replication_slot('{}')".format(sub2), dbname=_DB + ) + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_027_nosuperuser.py b/src/test/subscription/pyt/test_027_nosuperuser.py new file mode 100644 index 0000000000000..d1785cbf1eeb0 --- /dev/null +++ b/src/test/subscription/pyt/test_027_nosuperuser.py @@ -0,0 +1,295 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/027_nosuperuser.pl. + +Logical replication respects permissions: superuser vs role privileges, RLS, +table-owner privileges, apply-worker restart on superuser revocation, and the +password_required connection-string requirement for non-superusers. +""" + +import os +import re + +_TBL = "alice.unpartitioned" + + +def _publish(publisher, sql): + publisher.safe_psql("SET SESSION AUTHORIZATION regress_alice;\n" + sql) + + +def _agg(subscriber, tbl): + return subscriber.safe_psql("SELECT COUNT(i), MIN(i), MAX(i) FROM {}".format(tbl)) + + +def _expect_replication(publisher, subscriber, tbl, cnt, mn, mx, msg): + publisher.wait_for_catchup("admin_sub") + assert _agg(subscriber, tbl) == "{}|{}|{}".format(cnt, mn, mx), msg + + +def _expect_failure(subscriber, tbl, offset, cnt, mn, mx, pattern, msg): + offset = subscriber.wait_for_log(pattern, offset) + assert _agg(subscriber, tbl) == "{}|{}|{}".format(cnt, mn, mx), msg + return offset + + +def _setup(publisher, subscriber, connstr): + for node, rem_a, rem_b in ((publisher, 0, 1), (subscriber, 1, 0)): + node.safe_psql( + "CREATE ROLE regress_admin SUPERUSER LOGIN;\n" + "CREATE ROLE regress_alice NOSUPERUSER LOGIN;\n" + "GRANT CREATE ON DATABASE postgres TO regress_alice;\n" + "GRANT PG_CREATE_SUBSCRIPTION TO regress_alice;\n" + "SET SESSION AUTHORIZATION regress_alice;\n" + "CREATE SCHEMA alice;\n" + "GRANT USAGE ON SCHEMA alice TO regress_admin;\n" + "CREATE TABLE alice.unpartitioned (i INTEGER);\n" + "ALTER TABLE alice.unpartitioned REPLICA IDENTITY FULL;\n" + "GRANT SELECT ON TABLE alice.unpartitioned TO regress_admin;\n" + "CREATE TABLE alice.hashpart (i INTEGER) PARTITION BY HASH (i);\n" + "ALTER TABLE alice.hashpart REPLICA IDENTITY FULL;\n" + "GRANT SELECT ON TABLE alice.hashpart TO regress_admin;\n" + "CREATE TABLE alice.hashpart_a PARTITION OF alice.hashpart " + "FOR VALUES WITH (MODULUS 2, REMAINDER {});\n" + "ALTER TABLE alice.hashpart_a REPLICA IDENTITY FULL;\n" + "CREATE TABLE alice.hashpart_b PARTITION OF alice.hashpart " + "FOR VALUES WITH (MODULUS 2, REMAINDER {});\n" + "ALTER TABLE alice.hashpart_b REPLICA IDENTITY FULL;".format(rem_a, rem_b) + ) + publisher.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "CREATE PUBLICATION alice FOR TABLE alice.unpartitioned, alice.hashpart " + "WITH (publish_via_partition_root = true);" + ) + subscriber.safe_psql( + "SET SESSION AUTHORIZATION regress_admin;\n" + "CREATE SUBSCRIPTION admin_sub CONNECTION '{}' PUBLICATION alice " + "WITH (password_required=false);".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "admin_sub") + + +def _rls_and_owner_privs(publisher, subscriber, offset): + perm_denied = r"ERROR: ( [A-Z0-9]+:)? permission denied for table unpartitioned" + rls = ( + r'ERROR: ( [A-Z0-9]+:)? user "regress_alice" cannot replicate into relation ' + r'with row-level security enabled: "unpartitioned\w*"' + ) + subscriber.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "ALTER TABLE alice.unpartitioned ENABLE ROW LEVEL SECURITY;\n" + "ALTER TABLE alice.unpartitioned FORCE ROW LEVEL SECURITY;" + ) + _publish(publisher, "INSERT INTO {} (i) VALUES (15);".format(_TBL)) + offset = _expect_failure( + subscriber, _TBL, offset, 2, 11, 13, rls, "insert into forced-rls table fails" + ) + subscriber.safe_psql("ALTER TABLE alice.unpartitioned NO FORCE ROW LEVEL SECURITY;") + _expect_replication( + publisher, subscriber, _TBL, 3, 11, 15, "insert replicates if rls not forced" + ) + subscriber.safe_psql("ALTER TABLE alice.unpartitioned FORCE ROW LEVEL SECURITY;") + _publish(publisher, "UPDATE {} SET i = 17 WHERE i = 11;".format(_TBL)) + offset = _expect_failure( + subscriber, _TBL, offset, 3, 11, 15, rls, "update into forced-rls table fails" + ) + subscriber.safe_psql("ALTER TABLE alice.unpartitioned NO FORCE ROW LEVEL SECURITY;") + _expect_replication( + publisher, subscriber, _TBL, 3, 13, 17, "update replicates if rls not forced" + ) + + subscriber.safe_psql( + "REVOKE SELECT, INSERT ON alice.unpartitioned FROM regress_alice;" + ) + _publish(publisher, "INSERT INTO {} (i) VALUES (19);".format(_TBL)) + offset = _expect_failure( + subscriber, + _TBL, + offset, + 3, + 13, + 17, + perm_denied, + "insert fails without owner insert", + ) + subscriber.safe_psql("GRANT INSERT ON alice.unpartitioned TO regress_alice;") + _expect_replication( + publisher, subscriber, _TBL, 4, 13, 19, "restoring insert permits replication" + ) + + subscriber.safe_psql( + "REVOKE UPDATE, DELETE ON alice.unpartitioned FROM regress_alice;" + ) + _publish(publisher, "UPDATE {} SET i = 21 WHERE i = 13;".format(_TBL)) + _publish(publisher, "DELETE FROM {} WHERE i = 15;".format(_TBL)) + offset = _expect_failure( + subscriber, + _TBL, + offset, + 4, + 13, + 19, + perm_denied, + "update/delete fails without perm", + ) + subscriber.safe_psql( + "GRANT UPDATE, DELETE ON alice.unpartitioned TO regress_alice;" + ) + offset = _expect_failure( + subscriber, + _TBL, + offset, + 4, + 13, + 19, + perm_denied, + "update/delete fails without SELECT", + ) + subscriber.safe_psql("GRANT SELECT ON alice.unpartitioned TO regress_alice;") + _expect_replication( + publisher, subscriber, _TBL, 3, 17, 21, "restoring SELECT permits replication" + ) + return offset + + +def _password_required(create_pg): + """A non-superuser sub owner must give a password in the connection string.""" + publisher = create_pg("publisher1", allows_streaming="logical") + subscriber = create_pg("subscriber1") + base = publisher.connstr() + " user=regress_test_user dbname=postgres" + connstr1 = base + connstr2 = base + " password=secret" + + for node in (publisher, subscriber): + node.safe_psql( + "CREATE ROLE regress_test_user PASSWORD 'secret' LOGIN REPLICATION;\n" + "GRANT CREATE ON DATABASE postgres TO regress_test_user;\n" + "GRANT PG_CREATE_SUBSCRIPTION TO regress_test_user;" + ) + publisher.safe_psql( + "SET SESSION AUTHORIZATION regress_test_user;\n" + "CREATE PUBLICATION regress_test_pub;" + ) + subscriber.safe_psql( + "CREATE SUBSCRIPTION regress_test_sub CONNECTION '{}' " + "PUBLICATION regress_test_pub;".format(connstr1) + ) + subscriber.wait_for_subscription_sync(publisher, "regress_test_sub") + + save_pw = os.environ.get("PGPASSWORD") + os.environ["PGPASSWORD"] = "secret" + try: + # Require a password for regress_test_user on the publisher. + with open(publisher.datadir / "pg_hba.conf", "w", encoding="utf-8") as hba: + hba.write("local all regress_test_user md5\n") + publisher.reload() + subscriber.safe_psql( + "ALTER SUBSCRIPTION regress_test_sub OWNER TO regress_test_user;" + ) + + result = subscriber.psql_capture( + "SET SESSION AUTHORIZATION regress_test_user;\n" + "ALTER SUBSCRIPTION regress_test_sub REFRESH PUBLICATION;" + ) + assert result.rc != 0, "non-superuser owner without password fails" + assert re.search( + r"DETAIL: Non-superusers must provide a password in the connection " + r"string\.", + result.stderr, + ), "error requires a password in the connection string" + finally: + if save_pw is None: + os.environ.pop("PGPASSWORD", None) + else: + os.environ["PGPASSWORD"] = save_pw + + result = subscriber.psql_capture( + "SET SESSION AUTHORIZATION regress_test_user;\n" + "ALTER SUBSCRIPTION regress_test_sub CONNECTION '{}';\n" + "ALTER SUBSCRIPTION regress_test_sub REFRESH PUBLICATION;".format(connstr2) + ) + assert result.rc == 0, "refresh succeeds once the password is supplied" + + +def test_nosuperuser(create_pg): + """Permission, RLS and password-required semantics for logical replication.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + connstr = publisher.connstr() + " dbname=postgres" + _setup(publisher, subscriber, connstr) + offset = 0 + + # Superuser admin can replicate. + _publish(publisher, "INSERT INTO {} (i) VALUES (1);".format(_TBL)) + _publish(publisher, "INSERT INTO {} (i) VALUES (3);".format(_TBL)) + _publish(publisher, "INSERT INTO {} (i) VALUES (5);".format(_TBL)) + _publish(publisher, "UPDATE {} SET i = 7 WHERE i = 1;".format(_TBL)) + _publish(publisher, "DELETE FROM {} WHERE i = 3;".format(_TBL)) + _expect_replication(publisher, subscriber, _TBL, 2, 5, 7, "superuser replicates") + + # Revoke superuser: SET ROLE fails until restored. + subscriber.safe_psql("ALTER ROLE regress_admin NOSUPERUSER") + _publish(publisher, "UPDATE {} SET i = 9 WHERE i = 5;".format(_TBL)) + offset = _expect_failure( + subscriber, + _TBL, + offset, + 2, + 5, + 7, + r'ERROR: ( [A-Z0-9]+:)? role "regress_admin" cannot SET ROLE to ' + r'"regress_alice"', + "non-superuser admin fails to replicate update", + ) + subscriber.safe_psql("ALTER ROLE regress_admin SUPERUSER") + _expect_replication( + publisher, subscriber, _TBL, 2, 7, 9, "restored superuser replicates update" + ) + + # Privileges on the target role suffice for a non-superuser. + subscriber.safe_psql( + "ALTER ROLE regress_admin NOSUPERUSER;\nGRANT regress_alice TO regress_admin;" + ) + _publish(publisher, "INSERT INTO {} (i) VALUES (11);".format(_TBL)) + _expect_replication(publisher, subscriber, _TBL, 3, 7, 11, "nosuperuser INSERT") + _publish(publisher, "UPDATE {} SET i = 13 WHERE i = 7;".format(_TBL)) + _expect_replication(publisher, subscriber, _TBL, 3, 9, 13, "nosuperuser UPDATE") + _publish(publisher, "DELETE FROM {} WHERE i = 9;".format(_TBL)) + _expect_replication(publisher, subscriber, _TBL, 2, 11, 13, "nosuperuser DELETE") + + # Partitioned table. + _publish(publisher, "INSERT INTO alice.hashpart (i) VALUES (101);") + _publish(publisher, "INSERT INTO alice.hashpart (i) VALUES (102);") + _publish(publisher, "INSERT INTO alice.hashpart (i) VALUES (103);") + _publish(publisher, "UPDATE alice.hashpart SET i = 120 WHERE i = 102;") + _publish(publisher, "DELETE FROM alice.hashpart WHERE i = 101;") + _expect_replication( + publisher, + subscriber, + "alice.hashpart", + 2, + 103, + 120, + "nosuperuser into hashpart", + ) + + offset = _rls_and_owner_privs(publisher, subscriber, offset) + + # Apply worker restarts when the subscription owner loses superuser. + subscriber.safe_psql("ALTER ROLE regress_alice SUPERUSER") + subscriber.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "CREATE SUBSCRIPTION regression_sub CONNECTION '{}' PUBLICATION alice;".format( + connstr + ) + ) + subscriber.wait_for_subscription_sync(publisher, "regression_sub") + offset = subscriber.current_log_position() + subscriber.safe_psql("ALTER ROLE regress_alice NOSUPERUSER") + subscriber.wait_for_log( + r"LOG: ( [A-Z0-9]+:)? logical replication worker for subscription " + r'"regression_sub" will restart because the subscription owner\'s ' + r"superuser privileges have been revoked", + offset, + ) + + _password_required(create_pg) diff --git a/src/test/subscription/pyt/test_028_row_filter.py b/src/test/subscription/pyt/test_028_row_filter.py new file mode 100644 index 0000000000000..b642b4ce533d9 --- /dev/null +++ b/src/test/subscription/pyt/test_028_row_filter.py @@ -0,0 +1,499 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/subscription/t/028_row_filter.pl. + +Row filters in logical replication publications: validation of +WHERE clauses (replica identity, columns, expressions), combining filters +across publications (OR), partitioned tables with publish_via_partition_root, +TOAST, inheritance, and initial-sync vs incremental behavior. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_028_row_filter(create_pg): + """Generated golden port of 028_row_filter.""" + node_publisher = create_pg("publisher", allows_streaming="logical", start=False) + node_publisher.start() + node_subscriber = create_pg("subscriber", start=False) + node_subscriber.start() + publisher_connstr = node_publisher.connstr() + " dbname=postgres" + appname = "tap_sub" + node_publisher.safe_psql("CREATE TABLE tab_rf_x (x int primary key)") + node_subscriber.safe_psql("CREATE TABLE tab_rf_x (x int primary key)") + node_publisher.safe_psql( + "INSERT INTO tab_rf_x (x) VALUES (0), (5), (10), (15), (20)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_x FOR TABLE tab_rf_x WHERE (x > 10)" + ) + node_publisher.safe_psql("CREATE PUBLICATION tap_pub_forall FOR ALL TABLES") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '" + + publisher_connstr + + " application_name=" + + appname + + "' PUBLICATION tap_pub_x, tap_pub_forall" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql("SELECT count(x) FROM tab_rf_x") + assert ( + result == "5" + ), "check initial data copy from table tab_rf_x should not be filtered" + node_publisher.safe_psql("INSERT INTO tab_rf_x (x) VALUES (-99), (99)") + node_publisher.wait_for_catchup(appname) + result = node_subscriber.safe_psql("SELECT count(x) FROM tab_rf_x") + assert result == "7", "check table tab_rf_x should not be filtered" + node_publisher.safe_psql("DROP PUBLICATION tap_pub_forall") + node_publisher.safe_psql("DROP PUBLICATION tap_pub_x") + node_publisher.safe_psql("DROP TABLE tab_rf_x") + node_subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + node_subscriber.safe_psql("DROP TABLE tab_rf_x") + node_publisher.safe_psql("CREATE SCHEMA schema_rf_x") + node_publisher.safe_psql("CREATE TABLE schema_rf_x.tab_rf_x (x int primary key)") + node_publisher.safe_psql( + "CREATE TABLE schema_rf_x.tab_rf_partitioned (x int primary key) PARTITION BY RANGE(x)" + ) + node_publisher.safe_psql( + "CREATE TABLE public.tab_rf_partition (LIKE schema_rf_x.tab_rf_partitioned)" + ) + node_publisher.safe_psql( + "ALTER TABLE schema_rf_x.tab_rf_partitioned ATTACH PARTITION public.tab_rf_partition DEFAULT" + ) + node_subscriber.safe_psql("CREATE SCHEMA schema_rf_x") + node_subscriber.safe_psql("CREATE TABLE schema_rf_x.tab_rf_x (x int primary key)") + node_subscriber.safe_psql( + "CREATE TABLE schema_rf_x.tab_rf_partitioned (x int primary key) PARTITION BY RANGE(x)" + ) + node_subscriber.safe_psql( + "CREATE TABLE public.tab_rf_partition (LIKE schema_rf_x.tab_rf_partitioned)" + ) + node_subscriber.safe_psql( + "ALTER TABLE schema_rf_x.tab_rf_partitioned ATTACH PARTITION public.tab_rf_partition DEFAULT" + ) + node_publisher.safe_psql( + "INSERT INTO schema_rf_x.tab_rf_x (x) VALUES (0), (5), (10), (15), (20)" + ) + node_publisher.safe_psql( + "INSERT INTO schema_rf_x.tab_rf_partitioned (x) VALUES (1), (20)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_x FOR TABLE schema_rf_x.tab_rf_x WHERE (x > 10)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_allinschema FOR TABLES IN SCHEMA schema_rf_x, TABLE schema_rf_x.tab_rf_x WHERE (x > 10)" + ) + node_publisher.safe_psql( + "ALTER PUBLICATION tap_pub_allinschema ADD TABLE public.tab_rf_partition WHERE (x > 10)" + ) + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '" + + publisher_connstr + + " application_name=" + + appname + + "' PUBLICATION tap_pub_x, tap_pub_allinschema" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql("SELECT count(x) FROM schema_rf_x.tab_rf_x") + assert ( + result == "5" + ), "check initial data copy from table tab_rf_x should not be filtered" + node_publisher.safe_psql("INSERT INTO schema_rf_x.tab_rf_x (x) VALUES (-99), (99)") + node_publisher.safe_psql( + "INSERT INTO schema_rf_x.tab_rf_partitioned (x) VALUES (5), (25)" + ) + node_publisher.wait_for_catchup(appname) + result = node_subscriber.safe_psql("SELECT count(x) FROM schema_rf_x.tab_rf_x") + assert result == "7", "check table tab_rf_x should not be filtered" + result = node_subscriber.safe_psql("SELECT * FROM public.tab_rf_partition") + assert result == "20\n25", "check table tab_rf_partition should be filtered" + node_publisher.safe_psql("DROP PUBLICATION tap_pub_allinschema") + node_publisher.safe_psql("DROP PUBLICATION tap_pub_x") + node_publisher.safe_psql("DROP TABLE public.tab_rf_partition") + node_publisher.safe_psql("DROP TABLE schema_rf_x.tab_rf_partitioned") + node_publisher.safe_psql("DROP TABLE schema_rf_x.tab_rf_x") + node_publisher.safe_psql("DROP SCHEMA schema_rf_x") + node_subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + node_subscriber.safe_psql("DROP TABLE public.tab_rf_partition") + node_subscriber.safe_psql("DROP TABLE schema_rf_x.tab_rf_partitioned") + node_subscriber.safe_psql("DROP TABLE schema_rf_x.tab_rf_x") + node_subscriber.safe_psql("DROP SCHEMA schema_rf_x") + node_publisher.safe_psql("CREATE TABLE tab_rowfilter_1 (a int primary key, b text)") + node_publisher.safe_psql("ALTER TABLE tab_rowfilter_1 REPLICA IDENTITY FULL;") + node_publisher.safe_psql("CREATE TABLE tab_rowfilter_2 (c int primary key)") + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_3 (a int primary key, b boolean)" + ) + node_publisher.safe_psql("CREATE TABLE tab_rowfilter_4 (c int primary key)") + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_partitioned (a int primary key, b integer) PARTITION BY RANGE(a)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_less_10k (LIKE tab_rowfilter_partitioned)" + ) + node_publisher.safe_psql( + "ALTER TABLE tab_rowfilter_partitioned ATTACH PARTITION tab_rowfilter_less_10k FOR VALUES FROM (MINVALUE) TO (10000)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_greater_10k (LIKE tab_rowfilter_partitioned)" + ) + node_publisher.safe_psql( + "ALTER TABLE tab_rowfilter_partitioned ATTACH PARTITION tab_rowfilter_greater_10k FOR VALUES FROM (10000) TO (MAXVALUE)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_partitioned_2 (a int primary key, b integer) PARTITION BY RANGE(a)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_partition (LIKE tab_rowfilter_partitioned_2)" + ) + node_publisher.safe_psql( + "ALTER TABLE tab_rowfilter_partitioned_2 ATTACH PARTITION tab_rowfilter_partition DEFAULT" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_toast (a text NOT NULL, b text NOT NULL)" + ) + node_publisher.safe_psql( + "ALTER TABLE tab_rowfilter_toast ALTER COLUMN a SET STORAGE EXTERNAL" + ) + node_publisher.safe_psql( + "CREATE UNIQUE INDEX tab_rowfilter_toast_ri_index on tab_rowfilter_toast (a, b)" + ) + node_publisher.safe_psql( + "ALTER TABLE tab_rowfilter_toast REPLICA IDENTITY USING INDEX tab_rowfilter_toast_ri_index" + ) + node_publisher.safe_psql("CREATE TABLE tab_rowfilter_inherited (a int)") + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_child (b text) INHERITS (tab_rowfilter_inherited)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_viaroot_part (a int) PARTITION BY RANGE (a)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_viaroot_part_1 PARTITION OF tab_rowfilter_viaroot_part FOR VALUES FROM (1) TO (20)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_parent_sync (a int) PARTITION BY RANGE (a)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_child_sync PARTITION OF tab_rowfilter_parent_sync FOR VALUES FROM (1) TO (20)" + ) + node_publisher.safe_psql( + "CREATE TABLE tab_rowfilter_virtual (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * 2) VIRTUAL)" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_1 (a int primary key, b text)" + ) + node_subscriber.safe_psql("CREATE TABLE tab_rowfilter_2 (c int primary key)") + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_3 (a int primary key, b boolean)" + ) + node_subscriber.safe_psql("CREATE TABLE tab_rowfilter_4 (c int primary key)") + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_partitioned (a int primary key, b integer) PARTITION BY RANGE(a)" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_less_10k (LIKE tab_rowfilter_partitioned)" + ) + node_subscriber.safe_psql( + "ALTER TABLE tab_rowfilter_partitioned ATTACH PARTITION tab_rowfilter_less_10k FOR VALUES FROM (MINVALUE) TO (10000)" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_greater_10k (LIKE tab_rowfilter_partitioned)" + ) + node_subscriber.safe_psql( + "ALTER TABLE tab_rowfilter_partitioned ATTACH PARTITION tab_rowfilter_greater_10k FOR VALUES FROM (10000) TO (MAXVALUE)" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_partitioned_2 (a int primary key, b integer) PARTITION BY RANGE(a)" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_partition (LIKE tab_rowfilter_partitioned_2)" + ) + node_subscriber.safe_psql( + "ALTER TABLE tab_rowfilter_partitioned_2 ATTACH PARTITION tab_rowfilter_partition DEFAULT" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_toast (a text NOT NULL, b text NOT NULL)" + ) + node_subscriber.safe_psql( + "CREATE UNIQUE INDEX tab_rowfilter_toast_ri_index on tab_rowfilter_toast (a, b)" + ) + node_subscriber.safe_psql( + "ALTER TABLE tab_rowfilter_toast REPLICA IDENTITY USING INDEX tab_rowfilter_toast_ri_index" + ) + node_subscriber.safe_psql("CREATE TABLE tab_rowfilter_inherited (a int)") + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_child (b text) INHERITS (tab_rowfilter_inherited)" + ) + node_subscriber.safe_psql("CREATE TABLE tab_rowfilter_viaroot_part (a int)") + node_subscriber.safe_psql("CREATE TABLE tab_rowfilter_viaroot_part_1 (a int)") + node_subscriber.safe_psql("CREATE TABLE tab_rowfilter_parent_sync (a int)") + node_subscriber.safe_psql("CREATE TABLE tab_rowfilter_child_sync (a int)") + node_subscriber.safe_psql( + "CREATE TABLE tab_rowfilter_virtual (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * 2) VIRTUAL)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_1 FOR TABLE tab_rowfilter_1 WHERE (a > 1000 AND b <> 'filtered')" + ) + node_publisher.safe_psql( + "ALTER PUBLICATION tap_pub_1 ADD TABLE tab_rowfilter_2 WHERE (c % 7 = 0)" + ) + node_publisher.safe_psql( + "ALTER PUBLICATION tap_pub_1 SET TABLE tab_rowfilter_1 WHERE (a > 1000 AND b <> 'filtered'), tab_rowfilter_2 WHERE (c % 2 = 0), tab_rowfilter_3" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_2 FOR TABLE tab_rowfilter_2 WHERE (c % 3 = 0)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_3 FOR TABLE tab_rowfilter_partitioned" + ) + node_publisher.safe_psql( + "ALTER PUBLICATION tap_pub_3 ADD TABLE tab_rowfilter_less_10k WHERE (a < 6000)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_not_used FOR TABLE tab_rowfilter_1 WHERE (a < 0)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_4a FOR TABLE tab_rowfilter_4 WHERE (c % 2 = 0)" + ) + node_publisher.safe_psql("CREATE PUBLICATION tap_pub_4b FOR TABLE tab_rowfilter_4") + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_5a FOR TABLE tab_rowfilter_partitioned_2" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_5b FOR TABLE tab_rowfilter_partition WHERE (a > 10)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_toast FOR TABLE tab_rowfilter_toast WHERE (a = repeat('1234567890', 200) AND b < '10')" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_inherits FOR TABLE tab_rowfilter_inherited WHERE (a > 15)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_viaroot_1 FOR TABLE tab_rowfilter_viaroot_part WHERE (a > 15) WITH (publish_via_partition_root)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_viaroot_2 FOR TABLE tab_rowfilter_viaroot_part_1 WHERE (a < 15) WITH (publish_via_partition_root)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_parent_sync FOR TABLE tab_rowfilter_parent_sync WHERE (a > 15) WITH (publish_via_partition_root)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_child_sync FOR TABLE tab_rowfilter_child_sync WHERE (a < 15)" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION tap_pub_virtual FOR TABLE tab_rowfilter_virtual WHERE (y > 10)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) VALUES (1, 'not replicated')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) VALUES (1500, 'filtered')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) VALUES (1980, 'not filtered')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) SELECT x, 'test ' || x FROM generate_series(990,1002) x" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_2 (c) SELECT generate_series(1, 20)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_3 (a, b) SELECT x, (x % 3 = 0) FROM generate_series(1, 10) x" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_4 (c) SELECT generate_series(1, 10)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_parent_sync(a) VALUES(14), (16)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_partitioned (a, b) VALUES(1, 100),(7000, 101),(15000, 102),(5500, 300)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_less_10k (a, b) VALUES(2, 200),(6005, 201)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_greater_10k (a, b) VALUES(16000, 103)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_partitioned_2 (a, b) VALUES(1, 1),(20, 20)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_toast(a, b) VALUES(repeat('1234567890', 200), '1234567890')" + ) + node_publisher.safe_psql("INSERT INTO tab_rowfilter_inherited(a) VALUES(10),(20)") + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_child(a, b) VALUES(0,'0'),(30,'30'),(40,'40')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_virtual (id, x) VALUES (1, 2), (2, 4), (3, 6)" + ) + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '" + + publisher_connstr + + " application_name=" + + appname + + "' PUBLICATION tap_pub_1, tap_pub_2, tap_pub_3, tap_pub_4a, tap_pub_4b, tap_pub_5a, tap_pub_5b, tap_pub_toast, tap_pub_inherits, tap_pub_viaroot_2, tap_pub_viaroot_1, tap_pub_parent_sync, tap_pub_child_sync, tap_pub_virtual" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql("SELECT a, b FROM tab_rowfilter_1 ORDER BY 1, 2") + assert ( + result == "1001|test 1001\n1002|test 1002\n1980|not filtered" + ), "check initial data copy from table tab_rowfilter_1" + result = node_subscriber.safe_psql( + "SELECT count(c), min(c), max(c) FROM tab_rowfilter_2" + ) + assert result == "13|2|20", "check initial data copy from table tab_rowfilter_2" + result = node_subscriber.safe_psql( + "SELECT count(c), min(c), max(c) FROM tab_rowfilter_4" + ) + assert result == "10|1|10", "check initial data copy from table tab_rowfilter_4" + result = node_subscriber.safe_psql("SELECT count(a) FROM tab_rowfilter_3") + assert result == "10", "check initial data copy from table tab_rowfilter_3" + result = node_subscriber.safe_psql( + "SELECT a, b FROM tab_rowfilter_less_10k ORDER BY 1, 2" + ) + assert ( + result == "1|100\n2|200\n5500|300" + ), "check initial data copy from partition tab_rowfilter_less_10k" + result = node_subscriber.safe_psql( + "SELECT a, b FROM tab_rowfilter_greater_10k ORDER BY 1, 2" + ) + assert ( + result == "15000|102\n16000|103" + ), "check initial data copy from partition tab_rowfilter_greater_10k" + result = node_subscriber.safe_psql( + "SELECT a, b FROM tab_rowfilter_partition ORDER BY 1, 2" + ) + assert ( + result == "1|1\n20|20" + ), "check initial data copy from partition tab_rowfilter_partition" + result = node_subscriber.safe_psql("SELECT count(*) FROM tab_rowfilter_toast") + assert result == "0", "check initial data copy from table tab_rowfilter_toast" + result = node_subscriber.safe_psql( + "SELECT a FROM tab_rowfilter_inherited ORDER BY a" + ) + assert ( + result == "20\n30\n40" + ), "check initial data copy from table tab_rowfilter_inherited" + result = node_subscriber.safe_psql( + "SELECT a FROM tab_rowfilter_parent_sync ORDER BY 1" + ) + assert result == "16", "check initial data copy from tab_rowfilter_parent_sync" + result = node_subscriber.safe_psql( + "SELECT a FROM tab_rowfilter_child_sync ORDER BY 1" + ) + assert result == "", "check initial data copy from tab_rowfilter_child_sync" + result = node_subscriber.safe_psql( + "SELECT id, x FROM tab_rowfilter_virtual ORDER BY id" + ) + assert result == "3|6", "check initial data copy from table tab_rowfilter_virtual" + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) VALUES (800, 'test 800')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) VALUES (1600, 'test 1600')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) VALUES (1601, 'test 1601')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) VALUES (1602, 'filtered')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_1 (a, b) VALUES (1700, 'test 1700')" + ) + node_publisher.safe_psql("UPDATE tab_rowfilter_1 SET b = NULL WHERE a = 1600") + node_publisher.safe_psql( + "UPDATE tab_rowfilter_1 SET b = 'test 1601 updated' WHERE a = 1601" + ) + node_publisher.safe_psql( + "UPDATE tab_rowfilter_1 SET b = 'test 1602 updated' WHERE a = 1602" + ) + node_publisher.safe_psql("DELETE FROM tab_rowfilter_1 WHERE a = 1700") + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_2 (c) VALUES (21), (22), (23), (24), (25)" + ) + node_publisher.safe_psql("INSERT INTO tab_rowfilter_4 (c) VALUES (0), (11), (12)") + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_inherited (a) VALUES (14), (16)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_child (a, b) VALUES (13, '13'), (17, '17')" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_viaroot_part (a) VALUES (14), (15), (16)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_virtual (id, x) VALUES (4, 3), (5, 7)" + ) + node_publisher.wait_for_catchup(appname) + result = node_subscriber.safe_psql( + "SELECT count(c), min(c), max(c) FROM tab_rowfilter_2" + ) + assert result == "16|2|24", "check replicated rows to tab_rowfilter_2" + result = node_subscriber.safe_psql( + "SELECT count(c), min(c), max(c) FROM tab_rowfilter_4" + ) + assert result == "13|0|12", "check replicated rows to tab_rowfilter_4" + result = node_subscriber.safe_psql("SELECT a, b FROM tab_rowfilter_1 ORDER BY 1, 2") + assert ( + result + == "1001|test 1001\n1002|test 1002\n1601|test 1601 updated\n1602|test 1602 updated\n1980|not filtered" + ), "check replicated rows to table tab_rowfilter_1" + node_publisher.safe_psql( + "ALTER PUBLICATION tap_pub_3 SET (publish_via_partition_root = true)" + ) + node_publisher.safe_psql( + "ALTER PUBLICATION tap_pub_3 SET TABLE tab_rowfilter_partitioned WHERE (a < 5000), tab_rowfilter_less_10k WHERE (a < 6000)" + ) + node_subscriber.safe_psql("TRUNCATE TABLE tab_rowfilter_partitioned") + node_subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION WITH (copy_data = true)" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_partitioned (a, b) VALUES(4000, 400),(4001, 401),(4002, 402)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_less_10k (a, b) VALUES(4500, 450)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_less_10k (a, b) VALUES(5600, 123)" + ) + node_publisher.safe_psql( + "INSERT INTO tab_rowfilter_greater_10k (a, b) VALUES(14000, 1950)" + ) + node_publisher.safe_psql("UPDATE tab_rowfilter_less_10k SET b = 30 WHERE a = 4001") + node_publisher.safe_psql("DELETE FROM tab_rowfilter_less_10k WHERE a = 4002") + node_publisher.wait_for_catchup(appname) + result = node_subscriber.safe_psql( + "SELECT a, b FROM tab_rowfilter_partitioned ORDER BY 1, 2" + ) + assert ( + result == "1|100\n2|200\n4000|400\n4001|30\n4500|450" + ), "check publish_via_partition_root behavior" + result = node_subscriber.safe_psql( + "SELECT a FROM tab_rowfilter_inherited ORDER BY a" + ) + assert ( + result == "16\n17\n20\n30\n40" + ), "check replicated rows to tab_rowfilter_inherited and tab_rowfilter_child" + result = node_subscriber.safe_psql( + "SELECT id, x FROM tab_rowfilter_virtual ORDER BY id" + ) + assert result == "3|6\n5|7", "check replicated rows to tab_rowfilter_virtual" + node_publisher.safe_psql("UPDATE tab_rowfilter_toast SET b = '1'") + node_publisher.wait_for_catchup(appname) + result = node_subscriber.safe_psql( + "SELECT a = repeat('1234567890', 200), b FROM tab_rowfilter_toast" + ) + assert result == "t|1", "check replicated rows to tab_rowfilter_toast" + result = node_subscriber.safe_psql("SELECT a FROM tab_rowfilter_viaroot_part") + assert result == "16", "check replicated rows to tab_rowfilter_viaroot_part" + result = node_subscriber.safe_psql("SELECT a FROM tab_rowfilter_viaroot_part_1") + assert result == "", "check replicated rows to tab_rowfilter_viaroot_part_1" + node_subscriber.stop("fast") + node_publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_029_on_error.py b/src/test/subscription/pyt/test_029_on_error.py new file mode 100644 index 0000000000000..445fec952b471 --- /dev/null +++ b/src/test/subscription/pyt/test_029_on_error.py @@ -0,0 +1,132 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/029_on_error.pl. + +disable_on_error and ALTER SUBSCRIPTION ... SKIP transaction features. +""" + +import re + +from pypg import slurp_file + +# Matches the apply-conflict error block, capturing the error transaction's +# finish LSN. +_ERROR_LSN = ( + r'conflict detected on relation "public.tbl".*\n' + r".*DETAIL:.* Could not apply remote change.*\n" + r'.*Key already exists in unique index "tbl_pkey", modified by .*origin.* ' + r"in transaction \d+ at .*: key .*, local row .*\n" + r'.*CONTEXT:.* for replication target relation "public.tbl" in ' + r"transaction \d+, finished at ([0-9A-Fa-f]+/[0-9A-Fa-f]+)" +) + + +def _test_skip_lsn(publisher, subscriber, offset, nonconflict_data, expected, msg): + # Wait until a conflict disables the subscription. + subscriber.poll_query_until( + "SELECT subenabled = FALSE FROM pg_subscription WHERE subname = 'sub'" + ) + + # Get the finish LSN of the error transaction from the server log. + match = re.search(_ERROR_LSN, slurp_file(subscriber.log, offset)) + assert match, "could not get error-LSN" + lsn = match.group(1) + + subscriber.safe_psql("ALTER SUBSCRIPTION sub SKIP (lsn = '{}')".format(lsn)) + subscriber.safe_psql("ALTER SUBSCRIPTION sub ENABLE") + subscriber.poll_query_until( + "SELECT subskiplsn = '0/0' FROM pg_subscription WHERE subname = 'sub'" + ) + + offset = subscriber.wait_for_log( + r"LOG: ( [A-Z0-9]+:)? logical replication completed skipping " + r"transaction at LSN " + re.escape(lsn), + offset, + ) + + publisher.safe_psql("INSERT INTO tbl VALUES {}".format(nonconflict_data)) + publisher.wait_for_catchup("sub") + assert subscriber.safe_psql("SELECT count(*) FROM tbl") == expected, msg + return offset + + +def test_on_error(create_pg): + """disable_on_error then SKIP across normal, 2PC, and streamed conflicts.""" + publisher = create_pg("publisher", allows_streaming="logical", start=False) + publisher.append_conf( + "logical_decoding_work_mem = 64kB\nmax_prepared_transactions = 10" + ) + publisher.start() + subscriber = create_pg("subscriber", start=False) + subscriber.append_conf( + "max_prepared_transactions = 10\ntrack_commit_timestamp = on" + ) + subscriber.start() + + # The subscriber has a primary key and a preexisting conflicting row. + publisher.safe_psql( + "CREATE TABLE tbl (i INT, t BYTEA);\n" + "ALTER TABLE tbl REPLICA IDENTITY FULL;\n" + "INSERT INTO tbl VALUES (1, NULL);" + ) + subscriber.safe_psql( + "CREATE TABLE tbl (i INT PRIMARY KEY, t BYTEA);\n" + "INSERT INTO tbl VALUES (1, NULL);" + ) + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION pub FOR TABLE tbl") + subscriber.safe_psql( + "CREATE SUBSCRIPTION sub CONNECTION '{}' PUBLICATION pub " + "WITH (disable_on_error = true, streaming = on, two_phase = on)".format(connstr) + ) + + # Initial-sync uniqueness violation disables the subscription. + subscriber.poll_query_until( + "SELECT subenabled = false FROM pg_catalog.pg_subscription " + "WHERE subname = 'sub'" + ) + subscriber.safe_psql("TRUNCATE tbl") + subscriber.safe_psql("ALTER SUBSCRIPTION sub ENABLE") + subscriber.wait_for_subscription_sync(publisher, "sub") + assert ( + subscriber.safe_psql("SELECT COUNT(*) FROM tbl") == "1" + ), "subscription sub replicated data" + + offset = 0 + publisher.safe_psql("BEGIN;\nINSERT INTO tbl VALUES (1, NULL);\nCOMMIT;") + offset = _test_skip_lsn( + publisher, subscriber, offset, "(2, NULL)", "2", "test skipping transaction" + ) + + publisher.safe_psql( + "BEGIN;\nUPDATE tbl SET i = 2;\nPREPARE TRANSACTION 'gtx';\n" + "COMMIT PREPARED 'gtx';" + ) + offset = _test_skip_lsn( + publisher, + subscriber, + offset, + "(3, NULL)", + "3", + "test skipping prepare and commit prepared ", + ) + + publisher.safe_psql( + "BEGIN;\n" + "INSERT INTO tbl SELECT i, sha256(i::text::bytea) " + "FROM generate_series(1, 10000) s(i);\n" + "COMMIT;" + ) + _test_skip_lsn( + publisher, + subscriber, + offset, + "(4, sha256(4::text::bytea))", + "4", + "test skipping stream-commit", + ) + + assert ( + subscriber.safe_psql("SELECT COUNT(*) FROM pg_prepared_xacts") == "0" + ), "check all prepared transactions are resolved on the subscriber" diff --git a/src/test/subscription/pyt/test_030_origin.py b/src/test/subscription/pyt/test_030_origin.py new file mode 100644 index 0000000000000..3c185ad6e0206 --- /dev/null +++ b/src/test/subscription/pyt/test_030_origin.py @@ -0,0 +1,215 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/030_origin.pl. + +CREATE SUBSCRIPTION 'origin' parameter and its interaction with 'copy_data'. +""" + +import re + +_TAB_UNQUOTED = "tab'le" +_TAB = '"tab\'le"' + +_AB = "tap_sub_A_B" +_AB2 = "tap_sub_A_B_2" +_BA = "tap_sub_B_A" +_BC = "tap_sub_B_C" + + +def _warn_copy(subname): + return ( + r'WARNING: ( [A-Z0-9]+:)? subscription "{}" requested copy_data with ' + r"origin = NONE but might copy data that had a different origin".format(subname) + ) + + +def _rows(node): + return node.safe_psql("SELECT * FROM {} ORDER BY 1;".format(_TAB)) + + +def _setup_bidir(node_a, node_b, a_connstr, b_connstr): + node_a.safe_psql("CREATE TABLE {} (a int PRIMARY KEY)".format(_TAB)) + node_b.safe_psql("CREATE TABLE {} (a int PRIMARY KEY)".format(_TAB)) + + node_a.safe_psql("CREATE PUBLICATION tap_pub_A FOR TABLE {}".format(_TAB)) + node_b.safe_psql( + "CREATE SUBSCRIPTION {0} CONNECTION '{1} application_name={0}' " + "PUBLICATION tap_pub_A WITH (origin = none)".format(_BA, a_connstr) + ) + node_b.safe_psql("CREATE PUBLICATION tap_pub_B FOR TABLE {}".format(_TAB)) + node_a.safe_psql( + "CREATE SUBSCRIPTION {0} CONNECTION '{1} application_name={0}' " + "PUBLICATION tap_pub_B WITH (origin = none, copy_data = off)".format( + _AB, b_connstr + ) + ) + node_a.wait_for_subscription_sync(node_b, _AB) + node_b.wait_for_subscription_sync(node_a, _BA) + + +def _check_no_recursion_and_origin(node_a, node_b, node_c, c_connstr): + node_a.safe_psql("INSERT INTO {} VALUES (11);".format(_TAB)) + node_b.safe_psql("INSERT INTO {} VALUES (21);".format(_TAB)) + node_a.wait_for_catchup(_BA) + node_b.wait_for_catchup(_AB) + assert _rows(node_a) == "11\n21", "no infinite recursion (node_A)" + assert _rows(node_b) == "11\n21", "no infinite recursion (node_B)" + + node_a.safe_psql("DELETE FROM {};".format(_TAB)) + node_a.wait_for_catchup(_BA) + node_b.wait_for_catchup(_AB) + assert _rows(node_a) == "", "Check existing data" + assert _rows(node_b) == "", "Check existing data" + + # node_C -> node_B; its data must not reach node_A (origin = none). + node_c.safe_psql("CREATE TABLE {} (a int PRIMARY KEY)".format(_TAB)) + node_c.safe_psql("CREATE PUBLICATION tap_pub_C FOR TABLE {}".format(_TAB)) + node_b.safe_psql( + "CREATE SUBSCRIPTION {0} CONNECTION '{1} application_name={0}' " + "PUBLICATION tap_pub_C WITH (origin = none)".format(_BC, c_connstr) + ) + node_b.wait_for_subscription_sync(node_c, _BC) + + node_c.safe_psql("INSERT INTO {} VALUES (32);".format(_TAB)) + node_c.wait_for_catchup(_BC) + node_b.wait_for_catchup(_AB) + node_a.wait_for_catchup(_BA) + assert _rows(node_b) == "32", "node_C data replicated to node_B" + assert _rows(node_a) == "", "remote data from another node not replicated" + + +def _check_conflicts(node_a, node_b, node_c): + node_b.safe_psql("DELETE FROM {};".format(_TAB)) + node_a.safe_psql("INSERT INTO {} VALUES (32);".format(_TAB)) + node_a.wait_for_catchup(_BA) + node_b.wait_for_catchup(_AB) + assert _rows(node_b) == "32", "node_A data replicated to node_B" + + node_c.safe_psql("UPDATE {} SET a = 33 WHERE a = 32;".format(_TAB)) + node_b.wait_for_log( + r'conflict detected on relation "public.' + _TAB_UNQUOTED + r'": ' + r"conflict=update_origin_differs.*\n.*DETAIL:.* Updating the row that " + r'was modified by a different origin ".*" in transaction [0-9]+ at .*: ' + r"local row \(32\), remote row \(33\), replica identity \(a\)=\(32\)\." + ) + + node_b.safe_psql("DELETE FROM {};".format(_TAB)) + node_a.safe_psql("INSERT INTO {} VALUES (33);".format(_TAB)) + node_a.wait_for_catchup(_BA) + node_b.wait_for_catchup(_AB) + assert _rows(node_b) == "33", "node_A data replicated to node_B" + + node_c.safe_psql("DELETE FROM {} WHERE a = 33;".format(_TAB)) + node_b.wait_for_log( + r'conflict detected on relation "public.' + _TAB_UNQUOTED + r'": ' + r"conflict=delete_origin_differs.*\n.*DETAIL:.* Deleting the row that " + r'was modified by a different origin ".*" in transaction [0-9]+ at .*: ' + r"local row \(33\), replica identity \(a\)=\(33\).*" + ) + + +def _check_origin_warnings(node_a, node_b, b_connstr): + result = node_a.psql_capture( + "CREATE SUBSCRIPTION {0} CONNECTION '{1} application_name={0}' " + "PUBLICATION tap_pub_B WITH (origin = none, copy_data = on)".format( + _AB2, b_connstr + ) + ) + assert re.search( + _warn_copy("tap_sub_a_b_2"), result.stderr + ), "warn on copy_data with origin=none when publisher subscribes same table" + node_a.wait_for_subscription_sync(node_b, _AB2) + + node_a.safe_psql("ALTER SUBSCRIPTION {} REFRESH PUBLICATION".format(_AB2)) + + node_a.safe_psql("CREATE TABLE tab_new (a int PRIMARY KEY)") + node_b.safe_psql("CREATE TABLE tab_new (a int PRIMARY KEY)") + node_a.safe_psql("ALTER PUBLICATION tap_pub_A ADD TABLE tab_new") + node_b.safe_psql("ALTER SUBSCRIPTION {} REFRESH PUBLICATION".format(_BA)) + node_b.wait_for_subscription_sync(node_a, _BA) + node_b.safe_psql("ALTER PUBLICATION tap_pub_B ADD TABLE tab_new") + + result = node_a.psql_capture( + "ALTER SUBSCRIPTION {} REFRESH PUBLICATION".format(_AB2) + ) + assert re.search( + _warn_copy("tap_sub_a_b_2"), result.stderr + ), "warn on refresh when new table subscribes from a different publication" + + synced = ( + "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r');" + ) + assert node_a.poll_query_until(synced), "subscriber synchronized" + node_b.wait_for_catchup(_AB2) + + node_a.safe_psql( + "DROP TABLE tab_new;\nDROP SUBSCRIPTION {};\nDROP SUBSCRIPTION {};\n" + "DROP PUBLICATION tap_pub_A;".format(_AB2, _AB) + ) + node_b.safe_psql( + "DROP TABLE tab_new;\nDROP SUBSCRIPTION {};\n" + "DROP PUBLICATION tap_pub_B;".format(_BA) + ) + + +def _check_partition_warnings(node_a, node_b, node_c, a_connstr, b_connstr): + node_a.safe_psql( + "CREATE TABLE tab_part2(a int);\n" + "CREATE PUBLICATION tap_pub_A FOR TABLE tab_part2;" + ) + node_b.safe_psql( + "CREATE TABLE tab_main(a int) PARTITION BY RANGE(a);\n" + "CREATE TABLE tab_part1 PARTITION OF tab_main FOR VALUES FROM (0) TO (5);\n" + "CREATE TABLE tab_part2(a int) PARTITION BY RANGE(a);\n" + "CREATE TABLE tab_part2_1 PARTITION OF tab_part2 FOR VALUES FROM (5) TO (10);\n" + "ALTER TABLE tab_main ATTACH PARTITION tab_part2 FOR VALUES FROM (5) to (10);\n" + "CREATE SUBSCRIPTION tap_sub_A_B CONNECTION '{}' " + "PUBLICATION tap_pub_A;".format(a_connstr) + ) + node_c.safe_psql("CREATE TABLE tab_main(a int);\nCREATE TABLE tab_part2_1(a int);") + node_b.safe_psql( + "CREATE PUBLICATION tap_pub_B FOR TABLE tab_main " + "WITH (publish_via_partition_root);\n" + "CREATE PUBLICATION tap_pub_B_2 FOR TABLE tab_part2_1;" + ) + + for pub, why in ( + ("tap_pub_B", "publisher's partition subscribes from a different origin"), + ("tap_pub_B_2", "publisher's ancestor subscribes from a different origin"), + ): + result = node_c.psql_capture( + "CREATE SUBSCRIPTION tap_sub_B_C CONNECTION '{}' PUBLICATION {} " + "WITH (origin = none, copy_data = on);".format(b_connstr, pub) + ) + assert re.search(_warn_copy("tap_sub_b_c"), result.stderr), why + node_c.safe_psql("DROP SUBSCRIPTION tap_sub_B_C") + + node_b.safe_psql( + "DROP SUBSCRIPTION tap_sub_A_B;\nDROP PUBLICATION tap_pub_B;\n" + "DROP PUBLICATION tap_pub_B_2;\nDROP TABLE tab_main;" + ) + node_a.safe_psql("DROP PUBLICATION tap_pub_A;\nDROP TABLE tab_part2;") + + +def test_origin(create_pg): + """origin=none bidirectional replication, conflict detection, copy warnings.""" + node_a = create_pg("node_A", allows_streaming="logical") + node_b = create_pg("node_B", allows_streaming="logical", start=False) + node_b.append_conf("track_commit_timestamp = on") + node_b.start() + node_c = create_pg("node_C", allows_streaming="logical") + + a_connstr = node_a.connstr() + " dbname=postgres" + b_connstr = node_b.connstr() + " dbname=postgres" + c_connstr = node_c.connstr() + " dbname=postgres" + + _setup_bidir(node_a, node_b, a_connstr, b_connstr) + _check_no_recursion_and_origin(node_a, node_b, node_c, c_connstr) + _check_conflicts(node_a, node_b, node_c) + + # The remaining tests no longer exercise conflict detection. + node_b.append_conf("track_commit_timestamp = off") + node_b.restart() + + _check_origin_warnings(node_a, node_b, b_connstr) + _check_partition_warnings(node_a, node_b, node_c, a_connstr, b_connstr) diff --git a/src/test/subscription/pyt/test_031_column_list.py b/src/test/subscription/pyt/test_031_column_list.py new file mode 100644 index 0000000000000..a5390bb177ffe --- /dev/null +++ b/src/test/subscription/pyt/test_031_column_list.py @@ -0,0 +1,505 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/subscription/t/031_column_list.pl. + +Partial-column publication of tables (column lists), including weird column +names, fewer columns on the subscriber, partitioned tables, enum types, +publish_via_partition_root, and detection of conflicting column lists across +publications. Generated from the Perl original via .agent/gen_golden.py with +the error-detection tail hand-finished. +""" + +import re + + +def test_031_column_list(create_pg): + """Generated golden port of 031_column_list.""" + node_publisher = create_pg("publisher", allows_streaming="logical", start=False) + node_publisher.start() + node_subscriber = create_pg("subscriber", start=False) + node_subscriber.append_conf("max_logical_replication_workers = 6") + node_subscriber.start() + publisher_connstr = node_publisher.connstr() + " dbname=postgres" + node_publisher.safe_psql('CREATE TABLE tab1 (a int PRIMARY KEY, "B" int, c int)') + node_subscriber.safe_psql('CREATE TABLE tab1 (a int PRIMARY KEY, "B" int, c int)') + node_publisher.safe_psql("CREATE TABLE tab2 (a int PRIMARY KEY, b varchar, c int);") + node_subscriber.safe_psql("CREATE TABLE tab2 (a int PRIMARY KEY, b varchar)") + node_publisher.safe_psql( + 'CREATE TABLE tab3 ("a\'" int PRIMARY KEY, "B" varchar, "c\'" int)' + ) + node_subscriber.safe_psql('CREATE TABLE tab3 ("a\'" int PRIMARY KEY, "c\'" int)') + node_publisher.safe_psql( + "CREATE TABLE test_part (a int PRIMARY KEY, b text, c timestamptz) PARTITION BY LIST (a);\n\tCREATE TABLE test_part_1_1 PARTITION OF test_part FOR VALUES IN (1,2,3,4,5,6);\n\tCREATE TABLE test_part_2_1 PARTITION OF test_part FOR VALUES IN (7,8,9,10,11,12) PARTITION BY LIST (a);\n\tCREATE TABLE test_part_2_2 PARTITION OF test_part_2_1 FOR VALUES IN (7,8,9,10);" + ) + node_subscriber.safe_psql( + "CREATE TABLE test_part (a int PRIMARY KEY, b text) PARTITION BY LIST (a);\n\tCREATE TABLE test_part_1_1 PARTITION OF test_part FOR VALUES IN (1,2,3,4,5,6);\n\tCREATE TABLE test_part_2_1 PARTITION OF test_part FOR VALUES IN (7,8,9,10,11,12) PARTITION BY LIST (a);\n\tCREATE TABLE test_part_2_2 PARTITION OF test_part_2_1 FOR VALUES IN (7,8,9,10);" + ) + node_publisher.safe_psql( + "CREATE TYPE test_typ AS ENUM ('blue', 'red');\n\tCREATE TABLE tab4 (a INT PRIMARY KEY, b test_typ, c int, d text);" + ) + node_subscriber.safe_psql( + "CREATE TYPE test_typ AS ENUM ('blue', 'red');\n\tCREATE TABLE tab4 (a INT PRIMARY KEY, b test_typ, d text);" + ) + node_publisher.safe_psql( + 'CREATE PUBLICATION pub1\n\t FOR TABLE tab1 (a, "B"), tab3 ("a\'", "c\'"), test_part (a, b), tab4 (a, b, d)\n\t WITH (publish_via_partition_root = \'true\');' + ) + result = node_publisher.safe_psql( + "SELECT relname, prattrs\n\tFROM pg_publication_rel pb JOIN pg_class pc ON(pb.prrelid = pc.oid)\n\tORDER BY relname" + ) + assert ( + result == "tab1|1 2\ntab3|1 3\ntab4|1 2 4\ntest_part|1 2" + ), "publication relation updated" + node_publisher.safe_psql( + "INSERT INTO tab1 VALUES (1, 2, 3);\n\tINSERT INTO tab1 VALUES (4, 5, 6);" + ) + node_publisher.safe_psql( + "INSERT INTO tab3 VALUES (1, 2, 3);\n\tINSERT INTO tab3 VALUES (4, 5, 6);" + ) + node_publisher.safe_psql( + "INSERT INTO tab4 VALUES (1, 'red', 3, 'oh my');\n\tINSERT INTO tab4 VALUES (2, 'blue', 4, 'hello');" + ) + node_publisher.safe_psql( + "INSERT INTO test_part VALUES (1, 'abc', '2021-07-04 12:00:00');\n\tINSERT INTO test_part VALUES (2, 'bcd', '2021-07-03 11:12:13');\n\tINSERT INTO test_part VALUES (7, 'abc', '2021-07-04 12:00:00');\n\tINSERT INTO test_part VALUES (8, 'bcd', '2021-07-03 11:12:13');" + ) + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub1" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql("SELECT * FROM tab1 ORDER BY a") + assert result == "1|2|\n4|5|", "insert on column tab1.c is not replicated" + result = node_subscriber.safe_psql('SELECT * FROM tab3 ORDER BY "a\'"') + assert result == "1|3\n4|6", "insert on column tab3.b is not replicated" + result = node_subscriber.safe_psql("SELECT * FROM tab4 ORDER BY a") + assert ( + result == "1|red|oh my\n2|blue|hello" + ), "insert on column tab4.c is not replicated" + result = node_subscriber.safe_psql("SELECT * FROM test_part ORDER BY a") + assert ( + result == "1|abc\n2|bcd\n7|abc\n8|bcd" + ), "insert on column test_part.c columns is not replicated" + node_publisher.safe_psql( + "INSERT INTO tab1 VALUES (2, 3, 4);\n\tINSERT INTO tab1 VALUES (5, 6, 7);" + ) + node_publisher.safe_psql( + "INSERT INTO tab3 VALUES (2, 3, 4);\n\tINSERT INTO tab3 VALUES (5, 6, 7);" + ) + node_publisher.safe_psql( + "INSERT INTO tab4 VALUES (3, 'red', 5, 'foo');\n\tINSERT INTO tab4 VALUES (4, 'blue', 6, 'bar');" + ) + node_publisher.safe_psql( + "INSERT INTO test_part VALUES (3, 'xxx', '2022-02-01 10:00:00');\n\tINSERT INTO test_part VALUES (4, 'yyy', '2022-03-02 15:12:13');\n\tINSERT INTO test_part VALUES (9, 'zzz', '2022-04-03 21:00:00');\n\tINSERT INTO test_part VALUES (10, 'qqq', '2022-05-04 22:12:13');" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql("SELECT * FROM tab1 ORDER BY a") + assert ( + result == "1|2|\n2|3|\n4|5|\n5|6|" + ), "insert on column tab1.c is not replicated" + result = node_subscriber.safe_psql('SELECT * FROM tab3 ORDER BY "a\'"') + assert result == "1|3\n2|4\n4|6\n5|7", "insert on column tab3.b is not replicated" + result = node_subscriber.safe_psql("SELECT * FROM tab4 ORDER BY a") + assert ( + result == "1|red|oh my\n2|blue|hello\n3|red|foo\n4|blue|bar" + ), "insert on column tab4.c is not replicated" + result = node_subscriber.safe_psql("SELECT * FROM test_part ORDER BY a") + assert ( + result == "1|abc\n2|bcd\n3|xxx\n4|yyy\n7|abc\n8|bcd\n9|zzz\n10|qqq" + ), "insert on column test_part.c columns is not replicated" + node_publisher.safe_psql('UPDATE tab1 SET "B" = 2 * "B" where a = 1') + node_publisher.safe_psql("UPDATE tab1 SET c = 2*c where a = 4") + node_publisher.safe_psql( + 'UPDATE tab3 SET "B" = "B" || \' updated\' where "a\'" = 4' + ) + node_publisher.safe_psql('UPDATE tab3 SET "c\'" = 2 * "c\'" where "a\'" = 1') + node_publisher.safe_psql( + "UPDATE tab4 SET b = 'blue', c = c * 2, d = d || ' updated' where a = 1" + ) + node_publisher.safe_psql( + "UPDATE tab4 SET b = 'red', c = c * 2, d = d || ' updated' where a = 2" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql("SELECT * FROM tab1 ORDER BY a") + assert ( + result == "1|4|\n2|3|\n4|5|\n5|6|" + ), "only update on column tab1.b is replicated" + result = node_subscriber.safe_psql('SELECT * FROM tab3 ORDER BY "a\'"') + assert result == "1|6\n2|4\n4|6\n5|7", "only update on column tab3.c is replicated" + result = node_subscriber.safe_psql("SELECT * FROM tab4 ORDER BY a") + assert ( + result == "1|blue|oh my updated\n2|red|hello updated\n3|red|foo\n4|blue|bar" + ), "update on column tab4.c is not replicated" + node_publisher.safe_psql("INSERT INTO tab2 VALUES (1, 'abc', 3);") + node_publisher.safe_psql("ALTER PUBLICATION pub1 ADD TABLE tab2 (a, b)") + node_subscriber.safe_psql("ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION") + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO tab2 VALUES (2, 'def', 6);") + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql("SELECT * FROM tab2 ORDER BY a") + assert result == "1|abc\n2|def", "insert on column tab2.c is not replicated" + node_publisher.safe_psql( + "UPDATE tab2 SET c = 5 where a = 1;\n\tUPDATE tab2 SET b = 'xyz' where a = 2;" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql("SELECT * FROM tab2 ORDER BY a") + assert result == "1|abc\n2|xyz", "update on column tab2.c is not replicated" + node_publisher.safe_psql( + "CREATE TABLE tab5 (a int PRIMARY KEY, b int, c int, d int);\n\tCREATE PUBLICATION pub2 FOR TABLE tab5 (a, b);\n\tCREATE PUBLICATION pub3 FOR TABLE tab5 (a, b);\n\n\t-- insert a couple initial records\n\tINSERT INTO tab5 VALUES (1, 11, 111, 1111);\n\tINSERT INTO tab5 VALUES (2, 22, 222, 2222);" + ) + node_subscriber.safe_psql("CREATE TABLE tab5 (a int PRIMARY KEY, b int, d int);") + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub2, pub3" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO tab5 VALUES (3, 33, 333, 3333);\n\tINSERT INTO tab5 VALUES (4, 44, 444, 4444);" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM tab5 ORDER BY a") + == "1|11|\n2|22|\n3|33|\n4|44|" + ), "overlapping publications with overlapping column lists" + node_publisher.safe_psql( + "CREATE TABLE tab6 (a int PRIMARY KEY, b int, c int, d int);\n\tCREATE PUBLICATION pub4 FOR TABLE tab6 (a, b);\n\n\t-- initial data\n\tINSERT INTO tab6 VALUES (1, 22, 333, 4444);" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab6 (a int PRIMARY KEY, b int, c int, d int);" + ) + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub4" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO tab6 VALUES (2, 33, 444, 5555);\n\tUPDATE tab6 SET b = b * 2, c = c * 3, d = d * 4;" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM tab6 ORDER BY a") == "1|44||\n2|66||" + ), "replication with the original primary key" + node_publisher.safe_psql( + "ALTER TABLE tab6 DROP CONSTRAINT tab6_pkey;\n\tALTER TABLE tab6 ADD PRIMARY KEY (b);" + ) + node_subscriber.safe_psql( + "ALTER TABLE tab6 DROP CONSTRAINT tab6_pkey;\n\tALTER TABLE tab6 ADD PRIMARY KEY (b);" + ) + node_subscriber.safe_psql("ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION") + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO tab6 VALUES (3, 55, 666, 8888);\n\tUPDATE tab6 SET b = b * 2, c = c * 3, d = d * 4;" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM tab6 ORDER BY a") + == "1|88||\n2|132||\n3|110||" + ), "replication with the modified primary key" + node_publisher.safe_psql( + "CREATE TABLE tab7 (a int PRIMARY KEY, b int, c int, d int);\n\tCREATE PUBLICATION pub5 FOR TABLE tab7 (a, b);\n\n\t-- some initial data\n\tINSERT INTO tab7 VALUES (1, 22, 333, 4444);" + ) + node_subscriber.safe_psql( + "CREATE TABLE tab7 (a int PRIMARY KEY, b int, c int, d int);" + ) + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub5" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO tab7 VALUES (2, 33, 444, 5555);\n\tUPDATE tab7 SET b = b * 2, c = c * 3, d = d * 4;" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM tab7 ORDER BY a") == "1|44||\n2|66||" + ), "replication with the original primary key" + node_publisher.safe_psql( + "ALTER TABLE tab7 DROP CONSTRAINT tab7_pkey;\n\tALTER TABLE tab7 ADD PRIMARY KEY (a, b);" + ) + node_publisher.safe_psql( + "INSERT INTO tab7 VALUES (3, 55, 666, 7777);\n\tUPDATE tab7 SET b = b * 2, c = c * 3, d = d * 4;" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM tab7 ORDER BY a") + == "1|88||\n2|132||\n3|110||" + ), "replication with the modified primary key" + node_publisher.safe_psql( + "ALTER TABLE tab7 DROP CONSTRAINT tab7_pkey;\n\tINSERT INTO tab7 VALUES (4, 77, 888, 9999);\n\t-- update/delete is not allowed for tables without RI\n\tALTER TABLE tab7 ADD PRIMARY KEY (b, a);\n\tUPDATE tab7 SET b = b * 2, c = c * 3, d = d * 4;\n\tDELETE FROM tab7 WHERE a = 1;" + ) + node_publisher.safe_psql("") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM tab7 ORDER BY a") + == "2|264||\n3|220||\n4|154||" + ), "replication with the modified primary key" + node_publisher.safe_psql( + "CREATE TABLE test_part_a (a int, b int, c int) PARTITION BY LIST (a);\n\n\tCREATE TABLE test_part_a_1 PARTITION OF test_part_a FOR VALUES IN (1,2,3,4,5);\n\tALTER TABLE test_part_a_1 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_a_1 REPLICA IDENTITY USING INDEX test_part_a_1_pkey;\n\n\tCREATE TABLE test_part_a_2 PARTITION OF test_part_a FOR VALUES IN (6,7,8,9,10);\n\tALTER TABLE test_part_a_2 ADD PRIMARY KEY (b);\n\tALTER TABLE test_part_a_2 REPLICA IDENTITY USING INDEX test_part_a_2_pkey;\n\n\t-- initial data, one row in each partition\n\tINSERT INTO test_part_a VALUES (1, 3);\n\tINSERT INTO test_part_a VALUES (6, 4);" + ) + node_subscriber.safe_psql( + "CREATE TABLE test_part_a (b int, a int) PARTITION BY LIST (a);\n\n\tCREATE TABLE test_part_a_1 PARTITION OF test_part_a FOR VALUES IN (1,2,3,4,5);\n\tALTER TABLE test_part_a_1 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_a_1 REPLICA IDENTITY USING INDEX test_part_a_1_pkey;\n\n\tCREATE TABLE test_part_a_2 PARTITION OF test_part_a FOR VALUES IN (6,7,8,9,10);\n\tALTER TABLE test_part_a_2 ADD PRIMARY KEY (b);\n\tALTER TABLE test_part_a_2 REPLICA IDENTITY USING INDEX test_part_a_2_pkey;" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION pub6 FOR TABLE test_part_a (b, a) WITH (publish_via_partition_root = true);\n\tALTER PUBLICATION pub6 ADD TABLE test_part_a_1 (a);\n\tALTER PUBLICATION pub6 ADD TABLE test_part_a_2 (b);" + ) + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub6" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO test_part_a VALUES (2, 5);\n\tINSERT INTO test_part_a VALUES (7, 6);" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT a, b FROM test_part_a ORDER BY a, b") + == "1|3\n2|5\n6|4\n7|6" + ), "partitions with different replica identities not replicated correctly" + node_publisher.safe_psql( + "CREATE TABLE test_part_b (a int, b int) PARTITION BY LIST (a);\n\n\tCREATE TABLE test_part_b_1 PARTITION OF test_part_b FOR VALUES IN (1,2,3,4,5);\n\tALTER TABLE test_part_b_1 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_b_1 REPLICA IDENTITY USING INDEX test_part_b_1_pkey;\n\n\tCREATE TABLE test_part_b_2 PARTITION OF test_part_b FOR VALUES IN (6,7,8,9,10);\n\tALTER TABLE test_part_b_2 ADD PRIMARY KEY (b);\n\tALTER TABLE test_part_b_2 REPLICA IDENTITY USING INDEX test_part_b_2_pkey;\n\n\t-- initial data, one row in each partitions\n\tINSERT INTO test_part_b VALUES (1, 1);\n\tINSERT INTO test_part_b VALUES (6, 2);" + ) + node_subscriber.safe_psql( + "CREATE TABLE test_part_b (a int, b int) PARTITION BY LIST (a);\n\n\tCREATE TABLE test_part_b_1 PARTITION OF test_part_b FOR VALUES IN (1,2,3,4,5);\n\tALTER TABLE test_part_b_1 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_b_1 REPLICA IDENTITY USING INDEX test_part_b_1_pkey;\n\n\tCREATE TABLE test_part_b_2 PARTITION OF test_part_b FOR VALUES IN (6,7,8,9,10);\n\tALTER TABLE test_part_b_2 ADD PRIMARY KEY (b);\n\tALTER TABLE test_part_b_2 REPLICA IDENTITY USING INDEX test_part_b_2_pkey;" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION pub7 FOR TABLE test_part_b (a, b) WITH (publish_via_partition_root = true);" + ) + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub7" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO test_part_b VALUES (2, 3);\n\tINSERT INTO test_part_b VALUES (7, 4);" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM test_part_b ORDER BY a, b") + == "1|1\n2|3\n6|2\n7|4" + ), "partitions with different replica identities not replicated correctly" + node_publisher.safe_psql( + "CREATE TABLE test_part_c (a int, b int, c int) PARTITION BY LIST (a);\n\n\tCREATE TABLE test_part_c_1 PARTITION OF test_part_c FOR VALUES IN (1,3);\n\tALTER TABLE test_part_c_1 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_c_1 REPLICA IDENTITY USING INDEX test_part_c_1_pkey;\n\n\tCREATE TABLE test_part_c_2 PARTITION OF test_part_c FOR VALUES IN (2,4);\n\tALTER TABLE test_part_c_2 ADD PRIMARY KEY (b);\n\tALTER TABLE test_part_c_2 REPLICA IDENTITY USING INDEX test_part_c_2_pkey;\n\n\t-- initial data, one row for each partition\n\tINSERT INTO test_part_c VALUES (1, 3, 5);\n\tINSERT INTO test_part_c VALUES (2, 4, 6);" + ) + node_subscriber.safe_psql( + "CREATE TABLE test_part_c (a int, b int, c int) PARTITION BY LIST (a);\n\n\tCREATE TABLE test_part_c_1 PARTITION OF test_part_c FOR VALUES IN (1,3);\n\tALTER TABLE test_part_c_1 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_c_1 REPLICA IDENTITY USING INDEX test_part_c_1_pkey;\n\n\tCREATE TABLE test_part_c_2 PARTITION OF test_part_c FOR VALUES IN (2,4);\n\tALTER TABLE test_part_c_2 ADD PRIMARY KEY (b);\n\tALTER TABLE test_part_c_2 REPLICA IDENTITY USING INDEX test_part_c_2_pkey;" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION pub8 FOR TABLE test_part_c WITH (publish_via_partition_root = false);\n\tALTER PUBLICATION pub8 ADD TABLE test_part_c_1 (a,c);\n\tALTER PUBLICATION pub8 ADD TABLE test_part_c_2 (a,b);" + ) + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub8;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO test_part_c VALUES (3, 7, 8);\n\tINSERT INTO test_part_c VALUES (4, 9, 10);" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM test_part_c ORDER BY a, b") + == "1||5\n2|4|\n3||8\n4|9|" + ), "partitions with different replica identities not replicated correctly" + node_publisher.safe_psql( + "DROP PUBLICATION pub8;\n\tCREATE PUBLICATION pub8 FOR TABLE test_part_c WITH (publish_via_partition_root = false);\n\tALTER PUBLICATION pub8 ADD TABLE test_part_c_1 (a);\n\tALTER PUBLICATION pub8 ADD TABLE test_part_c_2 (a,b);" + ) + node_subscriber.safe_psql( + "ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION;\n\tTRUNCATE test_part_c;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "TRUNCATE test_part_c;\n\tINSERT INTO test_part_c VALUES (1, 3, 5);\n\tINSERT INTO test_part_c VALUES (2, 4, 6);" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM test_part_c ORDER BY a, b") + == "1||\n2|4|" + ), "partitions with different replica identities not replicated correctly" + node_publisher.safe_psql( + "CREATE TABLE test_part_d (a int, b int) PARTITION BY LIST (a);\n\n\tCREATE TABLE test_part_d_1 PARTITION OF test_part_d FOR VALUES IN (1,3);\n\tALTER TABLE test_part_d_1 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_d_1 REPLICA IDENTITY USING INDEX test_part_d_1_pkey;\n\n\tINSERT INTO test_part_d VALUES (1, 2);" + ) + node_subscriber.safe_psql( + "CREATE TABLE test_part_d (a int, b int) PARTITION BY LIST (a);\n\n\tCREATE TABLE test_part_d_1 PARTITION OF test_part_d FOR VALUES IN (1,3);\n\tALTER TABLE test_part_d_1 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_d_1 REPLICA IDENTITY USING INDEX test_part_d_1_pkey;\n\n\tCREATE TABLE test_part_d_2 PARTITION OF test_part_d FOR VALUES IN (2,4);\n\tALTER TABLE test_part_d_2 ADD PRIMARY KEY (a);\n\tALTER TABLE test_part_d_2 REPLICA IDENTITY USING INDEX test_part_d_2_pkey;" + ) + node_publisher.safe_psql( + "CREATE PUBLICATION pub9 FOR TABLE test_part_d (a) WITH (publish_via_partition_root = true);" + ) + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub9" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO test_part_d VALUES (3, 4);") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM test_part_d ORDER BY a, b") == "1|\n3|" + ), "partitions with different replica identities not replicated correctly" + node_publisher.safe_psql( + "DROP TABLE tab1, tab2, tab3, tab4, tab5, tab6, tab7,\n\t\t\t test_part, test_part_a, test_part_b, test_part_c, test_part_d;" + ) + node_publisher.safe_psql( + "CREATE TABLE test_mix_2 (a int PRIMARY KEY, b int, c int);\n\tCREATE PUBLICATION pub_mix_3 FOR TABLE test_mix_2 (a, b, c);\n\tCREATE PUBLICATION pub_mix_4 FOR ALL TABLES;\n\n\t-- initial data\n\tINSERT INTO test_mix_2 VALUES (1, 2, 3);" + ) + node_subscriber.safe_psql( + "CREATE TABLE test_mix_2 (a int PRIMARY KEY, b int, c int);\n\tDROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub_mix_3, pub_mix_4;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO test_mix_2 VALUES (4, 5, 6);") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM test_mix_2") == "1|2|3\n4|5|6" + ), "all columns should be replicated" + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE TABLE test_mix_3 (a int PRIMARY KEY, b int, c int);" + ) + node_publisher.safe_psql( + "DROP TABLE test_mix_2;\n\tCREATE TABLE test_mix_3 (a int PRIMARY KEY, b int, c int);\n\tCREATE PUBLICATION pub_mix_5 FOR TABLE test_mix_3 (a, b, c);\n\tCREATE PUBLICATION pub_mix_6 FOR TABLES IN SCHEMA public;\n\n\t-- initial data\n\tINSERT INTO test_mix_3 VALUES (1, 2, 3);" + ) + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub_mix_5, pub_mix_6;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO test_mix_3 VALUES (4, 5, 6);") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM test_mix_3") == "1|2|3\n4|5|6" + ), "all columns should be replicated" + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\n\tCREATE TABLE test_root (a int PRIMARY KEY, b int, c int) PARTITION BY RANGE (a);\n\tCREATE TABLE test_root_1 PARTITION OF test_root FOR VALUES FROM (1) TO (10);\n\tCREATE TABLE test_root_2 PARTITION OF test_root FOR VALUES FROM (10) TO (20);" + ) + node_publisher.safe_psql( + "CREATE TABLE test_root (a int PRIMARY KEY, b int, c int) PARTITION BY RANGE (a);\n\tCREATE TABLE test_root_1 PARTITION OF test_root FOR VALUES FROM (1) TO (10);\n\tCREATE TABLE test_root_2 PARTITION OF test_root FOR VALUES FROM (10) TO (20);\n\n\tCREATE PUBLICATION pub_test_root FOR TABLE test_root (a) WITH (publish_via_partition_root = true);\n\tCREATE PUBLICATION pub_test_root_1 FOR TABLE test_root_1 (a, b);\n\n\t-- initial data\n\tINSERT INTO test_root VALUES (1, 2, 3);\n\tINSERT INTO test_root VALUES (10, 20, 30);" + ) + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub_test_root, pub_test_root_1;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql( + "INSERT INTO test_root VALUES (2, 3, 4);\n\tINSERT INTO test_root VALUES (11, 21, 31);" + ) + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM test_root ORDER BY a, b, c") + == "1||\n2||\n10||\n11||" + ), "publication via partition root applies column list" + node_publisher.safe_psql( + "DROP PUBLICATION pub1, pub2, pub3, pub4, pub5, pub6, pub7, pub8;\n\n\tCREATE SCHEMA s1;\n\tCREATE TABLE s1.t (a int, b int, c int) PARTITION BY RANGE (a);\n\tCREATE TABLE t_1 PARTITION OF s1.t FOR VALUES FROM (1) TO (10);\n\n\tCREATE PUBLICATION pub1 FOR TABLES IN SCHEMA s1;\n\tCREATE PUBLICATION pub2 FOR TABLE t_1(a, b, c);\n\n\t-- initial data\n\tINSERT INTO s1.t VALUES (1, 2, 3);" + ) + node_subscriber.safe_psql( + "CREATE SCHEMA s1;\n\tCREATE TABLE s1.t (a int, b int, c int) PARTITION BY RANGE (a);\n\tCREATE TABLE t_1 PARTITION OF s1.t FOR VALUES FROM (1) TO (10);\n\n\tDROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub1, pub2;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO s1.t VALUES (4, 5, 6);") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM s1.t ORDER BY a") == "1|2|3\n4|5|6" + ), "two publications, publishing the same relation" + node_subscriber.safe_psql( + "TRUNCATE s1.t;\n\n\tALTER SUBSCRIPTION sub1 SET PUBLICATION pub2, pub1;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO s1.t VALUES (7, 8, 9);") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM s1.t ORDER BY a") == "7|8|9" + ), "two publications, publishing the same relation" + node_publisher.safe_psql( + "DROP SCHEMA s1 CASCADE;\n\tCREATE TABLE t (a int, b int, c int) PARTITION BY RANGE (a);\n\tCREATE TABLE t_1 PARTITION OF t FOR VALUES FROM (1) TO (10)\n\t\t PARTITION BY RANGE (a);\n\tCREATE TABLE t_2 PARTITION OF t_1 FOR VALUES FROM (1) TO (10);\n\n\tCREATE PUBLICATION pub3 FOR TABLE t_1 (a), t_2\n\t WITH (PUBLISH_VIA_PARTITION_ROOT);\n\n\t-- initial data\n\tINSERT INTO t VALUES (1, 2, 3);" + ) + node_subscriber.safe_psql( + "DROP SCHEMA s1 CASCADE;\n\tCREATE TABLE t (a int, b int, c int) PARTITION BY RANGE (a);\n\tCREATE TABLE t_1 PARTITION OF t FOR VALUES FROM (1) TO (10)\n\t\t PARTITION BY RANGE (a);\n\tCREATE TABLE t_2 PARTITION OF t_1 FOR VALUES FROM (1) TO (10);\n\n\tDROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub3;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO t VALUES (4, 5, 6);") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM t ORDER BY a, b, c") == "1||\n4||" + ), "publication containing both parent and child relation" + node_publisher.safe_psql( + "DROP TABLE t;\n\tCREATE TABLE t (a int, b int, c int) PARTITION BY RANGE (a);\n\tCREATE TABLE t_1 PARTITION OF t FOR VALUES FROM (1) TO (10)\n\t\t PARTITION BY RANGE (a);\n\tCREATE TABLE t_2 PARTITION OF t_1 FOR VALUES FROM (1) TO (10);\n\n\tCREATE PUBLICATION pub4 FOR TABLE t_1 (a), t_2 (b)\n\t WITH (PUBLISH_VIA_PARTITION_ROOT);\n\n\t-- initial data\n\tINSERT INTO t VALUES (1, 2, 3);" + ) + node_subscriber.safe_psql( + "DROP TABLE t;\n\tCREATE TABLE t (a int, b int, c int) PARTITION BY RANGE (a);\n\tCREATE TABLE t_1 PARTITION OF t FOR VALUES FROM (1) TO (10)\n\t\t PARTITION BY RANGE (a);\n\tCREATE TABLE t_2 PARTITION OF t_1 FOR VALUES FROM (1) TO (10);\n\n\tDROP SUBSCRIPTION sub1;\n\tCREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub4;" + ) + node_subscriber.wait_for_subscription_sync() + node_publisher.safe_psql("INSERT INTO t VALUES (4, 5, 6);") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM t ORDER BY a, b, c") == "1||\n4||" + ), "publication containing both parent and child relation" + node_publisher.safe_psql( + "CREATE TABLE test_oldtuple_col (a int PRIMARY KEY, b int, c int);\n\tCREATE PUBLICATION pub_check_oldtuple FOR TABLE test_oldtuple_col (a, b);\n\tINSERT INTO test_oldtuple_col VALUES(1, 2, 3);\n\tSELECT * FROM pg_create_logical_replication_slot('test_slot', 'pgoutput');\n\tUPDATE test_oldtuple_col SET a = 2;\n\tDELETE FROM test_oldtuple_col;" + ) + result = node_publisher.safe_psql( + "SELECT substr(data, 7, 2) = int2send(2::smallint)\n\t\tFROM pg_logical_slot_peek_binary_changes('test_slot', NULL, NULL,\n\t\t\t'proto_version', '1',\n\t\t\t'publication_names', 'pub_check_oldtuple')\n\t\tWHERE get_byte(data, 0) = 85 OR get_byte(data, 0) = 68" + ) + assert result == "t\nt", "check the number of columns in the old tuple" + node_publisher.safe_psql( + "CREATE TABLE test_mix_4 (a int PRIMARY KEY, b int, c int, d int GENERATED ALWAYS AS (a + 1) STORED, e int GENERATED ALWAYS AS (a + 2) VIRTUAL);\n\tALTER TABLE test_mix_4 DROP COLUMN c;\n\n\tCREATE PUBLICATION pub_mix_7 FOR TABLE test_mix_4 (a, b);\n\tCREATE PUBLICATION pub_mix_8 FOR TABLE test_mix_4;\n\n\t-- initial data\n\tINSERT INTO test_mix_4 VALUES (1, 2);" + ) + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE TABLE test_mix_4 (a int PRIMARY KEY, b int, c int, d int);" + ) + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub_mix_7, pub_mix_8;" + ) + node_subscriber.wait_for_subscription_sync() + assert ( + node_subscriber.safe_psql("SELECT * FROM test_mix_4 ORDER BY a") == "1|2||" + ), "initial synchronization with multiple publications with the same column list" + node_publisher.safe_psql("INSERT INTO test_mix_4 VALUES (3, 4);") + node_publisher.wait_for_catchup("sub1") + assert ( + node_subscriber.safe_psql("SELECT * FROM test_mix_4 ORDER BY a") + == "1|2||\n3|4||" + ), "replication with multiple publications with the same column list" + node_publisher.safe_psql( + "CREATE TABLE test_mix_1 (a int PRIMARY KEY, b int, c int);\n\tCREATE PUBLICATION pub_mix_1 FOR TABLE test_mix_1 (a, b);\n\tCREATE PUBLICATION pub_mix_2 FOR TABLE test_mix_1 (a, c);" + ) + node_subscriber.safe_psql( + "DROP SUBSCRIPTION sub1;\n\tCREATE TABLE test_mix_1 (a int PRIMARY KEY, b int, c int);" + ) + result = node_subscriber.psql_capture( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub_mix_1, pub_mix_2;" + ) + assert re.search( + r'cannot use different column lists for table "public.test_mix_1" in different publications', + result.stderr, + ), "different column lists detected" + node_publisher.safe_psql("ALTER PUBLICATION pub_mix_1 SET TABLE test_mix_1 (a, c);") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub_mix_1, pub_mix_2;" + ) + node_publisher.wait_for_catchup("sub1") + node_publisher.safe_psql( + "ALTER PUBLICATION pub_mix_1 SET TABLE test_mix_1 (a, b);\n\tINSERT INTO test_mix_1 VALUES(1, 1, 1);" + ) + node_publisher.wait_for_log( + r'cannot use different column lists for table "public.test_mix_1" in different publications' + ) + node_subscriber.stop("fast") + node_publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_032_subscribe_use_index.py b/src/test/subscription/pyt/test_032_subscribe_use_index.py new file mode 100644 index 0000000000000..478e085052e03 --- /dev/null +++ b/src/test/subscription/pyt/test_032_subscribe_use_index.py @@ -0,0 +1,302 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/032_subscribe_use_index.pl. + +Logical replication apply uses an available index on the subscriber (REPLICA +IDENTITY FULL), across multi-column, partitioned, expression, unique and hash +index cases. +""" + +_APP = "tap_sub" + + +def _pubsub(publisher, subscriber, connstr, table): + publisher.safe_psql( + "CREATE PUBLICATION tap_pub_rep_full FOR TABLE {}".format(table) + ) + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub_rep_full CONNECTION " + "'{} application_name={}' PUBLICATION tap_pub_rep_full".format(connstr, _APP) + ) + subscriber.wait_for_subscription_sync(publisher, _APP) + + +def _drop_pubsub(publisher, subscriber, table): + publisher.safe_psql("DROP PUBLICATION tap_pub_rep_full") + publisher.safe_psql("DROP TABLE {}".format(table)) + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub_rep_full") + subscriber.safe_psql("DROP TABLE {}".format(table)) + + +def _idx_scan(indexrelname, value): + return "select {} from pg_stat_all_indexes " "where indexrelname = '{}';".format( + value, indexrelname + ) + + +def _multi_column(publisher, subscriber, connstr): + publisher.safe_psql("CREATE TABLE test_replica_id_full (x int, y text)") + publisher.safe_psql("ALTER TABLE test_replica_id_full REPLICA IDENTITY FULL") + subscriber.safe_psql("CREATE TABLE test_replica_id_full (x int, y text)") + subscriber.safe_psql( + "CREATE INDEX test_replica_id_full_idx ON test_replica_id_full(x,y)" + ) + publisher.safe_psql( + "INSERT INTO test_replica_id_full SELECT (i%10), (i%10)::text " + "FROM generate_series(0,10) i" + ) + _pubsub(publisher, subscriber, connstr, "test_replica_id_full") + publisher.safe_psql("DELETE FROM test_replica_id_full WHERE x IN (5, 6)") + publisher.safe_psql( + "UPDATE test_replica_id_full SET x = 100, y = '200' WHERE x IN (1, 2)" + ) + publisher.wait_for_catchup(_APP) + assert subscriber.poll_query_until( + _idx_scan("test_replica_id_full_idx", "(idx_scan = 4)") + ), "4 rows updated via index" + assert ( + subscriber.safe_psql( + "select count(*) from test_replica_id_full WHERE (x = 100 and y = '200')" + ) + == "2" + ), "correct data after UPDATE" + assert ( + subscriber.safe_psql( + "select count(*) from test_replica_id_full where x in (5, 6)" + ) + == "0" + ), "correct data after DELETE" + _drop_pubsub(publisher, subscriber, "test_replica_id_full") + + +def _partitioned(publisher, subscriber, connstr): + part_ddl = ( + "CREATE TABLE users_table_part(user_id bigint, value_1 int, value_2 int) " + "PARTITION BY RANGE (value_1);\n" + "CREATE TABLE users_table_part_0 PARTITION OF users_table_part " + "FOR VALUES FROM (0) TO (10);\n" + "CREATE TABLE users_table_part_1 PARTITION OF users_table_part " + "FOR VALUES FROM (10) TO (20);" + ) + publisher.safe_psql(part_ddl) + for tab in ("users_table_part", "users_table_part_0", "users_table_part_1"): + publisher.safe_psql("ALTER TABLE {} REPLICA IDENTITY FULL".format(tab)) + subscriber.safe_psql(part_ddl) + subscriber.safe_psql( + "CREATE INDEX users_table_part_idx ON users_table_part(user_id, value_1)" + ) + publisher.safe_psql( + "INSERT INTO users_table_part SELECT (i%100), (i%20), i " + "FROM generate_series(0,100) i" + ) + _pubsub(publisher, subscriber, connstr, "users_table_part") + publisher.safe_psql("UPDATE users_table_part SET value_1 = 0 WHERE user_id = 4") + publisher.safe_psql( + "DELETE FROM users_table_part WHERE user_id = 1 and value_1 = 1" + ) + publisher.safe_psql( + "DELETE FROM users_table_part WHERE user_id = 12 and value_1 = 12" + ) + publisher.wait_for_catchup(_APP) + assert subscriber.poll_query_until( + "select sum(idx_scan)=3 from pg_stat_all_indexes " + "where indexrelname ilike 'users_table_part_%';" + ), "partitioned table updates via index" + assert ( + subscriber.safe_psql( + "select sum(user_id+value_1+value_2) from users_table_part" + ) + == "10907" + ), "correct data" + assert ( + subscriber.safe_psql( + "select count(DISTINCT(user_id,value_1, value_2)) from users_table_part" + ) + == "99" + ), "correct data" + _drop_pubsub(publisher, subscriber, "users_table_part") + + +def _expr_or_partial(publisher, subscriber, connstr): + indexes = ( + "select sum(idx_scan) from pg_stat_all_indexes where indexrelname IN " + "('people_names_expr_only', 'people_names_partial')" + ) + publisher.safe_psql("CREATE TABLE people (firstname text, lastname text)") + publisher.safe_psql("ALTER TABLE people REPLICA IDENTITY FULL") + subscriber.safe_psql("CREATE TABLE people (firstname text, lastname text)") + subscriber.safe_psql( + "CREATE INDEX people_names_expr_only ON people " + "((firstname || ' ' || lastname))" + ) + subscriber.safe_psql( + "CREATE INDEX people_names_partial ON people(firstname) " + "WHERE (firstname = 'first_name_1')" + ) + publisher.safe_psql( + "INSERT INTO people SELECT 'first_name_' || i::text, " + "'last_name_' || i::text FROM generate_series(0,200) i" + ) + _pubsub(publisher, subscriber, connstr, "people") + publisher.safe_psql( + "UPDATE people SET firstname = 'no-name' WHERE firstname = 'first_name_1'" + ) + publisher.safe_psql( + "UPDATE people SET firstname = 'no-name' WHERE firstname = 'first_name_2' " + "AND lastname = 'last_name_2'" + ) + publisher.wait_for_catchup(_APP) + assert subscriber.safe_psql(indexes) == "0", "expression/partial index not used" + publisher.safe_psql("DELETE FROM people WHERE firstname = 'first_name_3'") + publisher.safe_psql( + "DELETE FROM people WHERE firstname = 'first_name_4' " + "AND lastname = 'last_name_4'" + ) + publisher.wait_for_catchup(_APP) + assert subscriber.safe_psql(indexes) == "0", "expression/partial index not used" + assert subscriber.safe_psql("SELECT count(*) FROM people") == "199", "correct data" + _drop_pubsub(publisher, subscriber, "people") + + +def _expr_and_columns(publisher, subscriber, connstr): + publisher.safe_psql("CREATE TABLE people (firstname text, lastname text)") + publisher.safe_psql("ALTER TABLE people REPLICA IDENTITY FULL") + subscriber.safe_psql("CREATE TABLE people (firstname text, lastname text)") + subscriber.safe_psql( + "CREATE INDEX people_names ON people " + "(firstname, lastname, (firstname || ' ' || lastname))" + ) + publisher.safe_psql( + "INSERT INTO people SELECT 'first_name_' || i::text, " + "'last_name_' || i::text FROM generate_series(0, 20) i" + ) + _pubsub(publisher, subscriber, connstr, "people") + publisher.safe_psql( + "UPDATE people SET firstname = 'no-name' WHERE firstname = 'first_name_1'" + ) + publisher.safe_psql("DELETE FROM people WHERE firstname = 'no-name'") + publisher.wait_for_catchup(_APP) + assert subscriber.poll_query_until( + _idx_scan("people_names", "idx_scan=2") + ), "two rows deleted via expression+columns index" + assert subscriber.safe_psql("SELECT count(*) FROM people") == "20", "correct data" + assert ( + subscriber.safe_psql("SELECT count(*) FROM people WHERE firstname = 'no-name'") + == "0" + ), "correct data" + subscriber.safe_psql("DROP INDEX people_names") + publisher.safe_psql("DELETE FROM people WHERE lastname = 'last_name_18'") + publisher.wait_for_catchup(_APP) + assert ( + subscriber.safe_psql( + "SELECT count(*) FROM people WHERE lastname = 'last_name_18'" + ) + == "0" + ), "correct data via sequential scan" + _drop_pubsub(publisher, subscriber, "people") + + +def _null_and_missing(publisher, subscriber, connstr): + publisher.safe_psql("CREATE TABLE test_replica_id_full (x int)") + publisher.safe_psql("ALTER TABLE test_replica_id_full REPLICA IDENTITY FULL") + subscriber.safe_psql("CREATE TABLE test_replica_id_full (x int, y int)") + subscriber.safe_psql( + "CREATE INDEX test_replica_id_full_idx ON test_replica_id_full(x,y)" + ) + _pubsub(publisher, subscriber, connstr, "test_replica_id_full") + publisher.safe_psql("INSERT INTO test_replica_id_full VALUES (1), (2), (3)") + publisher.safe_psql("UPDATE test_replica_id_full SET x = x + 1 WHERE x = 1") + publisher.wait_for_catchup(_APP) + assert subscriber.poll_query_until( + _idx_scan("test_replica_id_full_idx", "idx_scan=1") + ), "index used even with NULL values" + assert ( + subscriber.safe_psql("select sum(x) from test_replica_id_full WHERE y IS NULL") + == "7" + ), "correct data" + assert ( + subscriber.safe_psql( + "select count(*) from test_replica_id_full WHERE y IS NULL" + ) + == "3" + ), "correct data" + _drop_pubsub(publisher, subscriber, "test_replica_id_full") + + +def _unique_index(publisher, subscriber, connstr): + publisher.safe_psql("CREATE TABLE test_replica_id_full (x int, y int)") + publisher.safe_psql("ALTER TABLE test_replica_id_full REPLICA IDENTITY FULL") + subscriber.safe_psql("CREATE TABLE test_replica_id_full (x int, y int)") + subscriber.safe_psql( + "CREATE UNIQUE INDEX test_replica_id_full_idxy ON test_replica_id_full(x,y)" + ) + publisher.safe_psql( + "INSERT INTO test_replica_id_full SELECT i, i FROM generate_series(0,21) i" + ) + _pubsub(publisher, subscriber, connstr, "test_replica_id_full") + subscriber.safe_psql( + "INSERT INTO test_replica_id_full SELECT i+100, i FROM generate_series(0,21) i" + ) + publisher.safe_psql("UPDATE test_replica_id_full SET x = 2000 WHERE y = 15") + publisher.wait_for_catchup(_APP) + assert subscriber.poll_query_until( + _idx_scan("test_replica_id_full_idxy", "(idx_scan = 1)") + ), "one row updated via unique index" + assert ( + subscriber.safe_psql("SELECT count(*) FROM test_replica_id_full WHERE x = 2000") + == "1" + ), "only one row updated" + _drop_pubsub(publisher, subscriber, "test_replica_id_full") + + +def _hash_index(publisher, subscriber, connstr): + publisher.safe_psql("CREATE TABLE test_replica_id_full (x int, y text)") + publisher.safe_psql("ALTER TABLE test_replica_id_full REPLICA IDENTITY FULL") + subscriber.safe_psql("CREATE TABLE test_replica_id_full (x int, y text)") + subscriber.safe_psql( + "CREATE INDEX test_replica_id_full_idx ON test_replica_id_full USING HASH (x)" + ) + publisher.safe_psql( + "INSERT INTO test_replica_id_full SELECT i, (i%10)::text " + "FROM generate_series(0,10) i" + ) + _pubsub(publisher, subscriber, connstr, "test_replica_id_full") + publisher.safe_psql("DELETE FROM test_replica_id_full WHERE x IN (5, 6)") + publisher.safe_psql( + "UPDATE test_replica_id_full SET x = 100, y = '200' WHERE x IN (1, 2)" + ) + publisher.wait_for_catchup(_APP) + assert subscriber.poll_query_until( + _idx_scan("test_replica_id_full_idx", "(idx_scan = 4)") + ), "rows deleted/updated via hash index" + assert ( + subscriber.safe_psql( + "select count(*) from test_replica_id_full WHERE (x = 100 and y = '200')" + ) + == "2" + ), "correct data after UPDATE" + assert ( + subscriber.safe_psql( + "select count(*) from test_replica_id_full where x in (5, 6)" + ) + == "0" + ), "correct data after DELETE" + _drop_pubsub(publisher, subscriber, "test_replica_id_full") + + +def test_subscribe_use_index(create_pg): + """Apply uses subscriber indexes across many index kinds and table shapes.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + connstr = publisher.connstr() + " dbname=postgres" + + _multi_column(publisher, subscriber, connstr) + _partitioned(publisher, subscriber, connstr) + _expr_or_partial(publisher, subscriber, connstr) + _expr_and_columns(publisher, subscriber, connstr) + _null_and_missing(publisher, subscriber, connstr) + _unique_index(publisher, subscriber, connstr) + _hash_index(publisher, subscriber, connstr) + + subscriber.stop("fast") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_033_run_as_table_owner.py b/src/test/subscription/pyt/test_033_run_as_table_owner.py new file mode 100644 index 0000000000000..fb870c7b638a2 --- /dev/null +++ b/src/test/subscription/pyt/test_033_run_as_table_owner.py @@ -0,0 +1,189 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/033_run_as_table_owner.pl. + +Logical replication respects permissions (run_as_owner and role privileges). +""" + +_TBL = "alice.unpartitioned" +_PERM_DENIED = r"ERROR: ( [A-Z0-9]+:)? permission denied for table unpartitioned" + + +def _publish_insert(publisher, new_i): + publisher.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "INSERT INTO {} (i) VALUES ({});".format(_TBL, new_i) + ) + + +def _publish_update(publisher, old_i, new_i): + publisher.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "UPDATE {} SET i = {} WHERE i = {};".format(_TBL, new_i, old_i) + ) + + +def _publish_delete(publisher, old_i): + publisher.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "DELETE FROM {} WHERE i = {};".format(_TBL, old_i) + ) + + +def _agg(subscriber): + return subscriber.safe_psql("SELECT COUNT(i), MIN(i), MAX(i) FROM {}".format(_TBL)) + + +def _expect_replication(publisher, subscriber, cnt, mn, mx, name): + publisher.wait_for_catchup("admin_sub") + assert _agg(subscriber) == "{}|{}|{}".format(cnt, mn, mx), name + + +def _expect_failure(subscriber, offset, cnt, mn, mx, name): + offset = subscriber.wait_for_log(_PERM_DENIED, offset) + assert _agg(subscriber) == "{}|{}|{}".format(cnt, mn, mx), name + return offset + + +def _setup(publisher, subscriber, connstr): + for node in (publisher, subscriber): + node.safe_psql( + "CREATE ROLE regress_admin SUPERUSER LOGIN;\n" + "CREATE ROLE regress_admin2 SUPERUSER LOGIN;\n" + "CREATE ROLE regress_alice NOSUPERUSER LOGIN;\n" + "GRANT CREATE ON DATABASE postgres TO regress_alice;\n" + "SET SESSION AUTHORIZATION regress_alice;\n" + "CREATE SCHEMA alice;\n" + "GRANT USAGE ON SCHEMA alice TO regress_admin;\n" + "CREATE TABLE alice.unpartitioned (i INTEGER);\n" + "ALTER TABLE alice.unpartitioned REPLICA IDENTITY FULL;\n" + "GRANT SELECT ON TABLE alice.unpartitioned TO regress_admin;" + ) + publisher.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "CREATE PUBLICATION alice FOR TABLE alice.unpartitioned " + "WITH (publish_via_partition_root = true);" + ) + subscriber.safe_psql( + "SET SESSION AUTHORIZATION regress_admin;\n" + "CREATE SUBSCRIPTION admin_sub CONNECTION '{}' PUBLICATION alice " + "WITH (run_as_owner = true, password_required = false);".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "admin_sub") + + +def test_run_as_table_owner(create_pg): + """run_as_owner and table/role privileges gate logical replication.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + connstr = publisher.connstr() + " dbname=postgres" + _setup(publisher, subscriber, connstr) + offset = 0 + + # Superuser owner can replicate. + _publish_insert(publisher, 1) + _publish_insert(publisher, 3) + _publish_insert(publisher, 5) + _publish_update(publisher, 1, 7) + _publish_delete(publisher, 3) + _expect_replication(publisher, subscriber, 2, 5, 7, "superuser can replicate") + + # No privileges: replication fails. + subscriber.safe_psql("ALTER ROLE regress_admin NOSUPERUSER") + _publish_insert(publisher, 9) + offset = _expect_failure( + subscriber, offset, 2, 5, 7, "with no privileges cannot replicate" + ) + + # INSERT privilege (but not SELECT) lets INSERT replicate. + subscriber.safe_psql( + "ALTER ROLE regress_admin NOSUPERUSER;\n" + "SET SESSION AUTHORIZATION regress_alice;\n" + "GRANT INSERT,UPDATE,DELETE ON alice.unpartitioned TO regress_admin;\n" + "REVOKE SELECT ON alice.unpartitioned FROM regress_admin;" + ) + _expect_replication( + publisher, subscriber, 3, 5, 9, "with INSERT privilege can replicate INSERT" + ) + + # No SELECT: UPDATE/DELETE cannot replicate. + _publish_update(publisher, 5, 11) + _publish_delete(publisher, 9) + offset = _expect_failure( + subscriber, + offset, + 3, + 5, + 9, + "without SELECT privilege cannot replicate UPDATE or DELETE", + ) + + # Grant SELECT: replication resumes. + subscriber.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "GRANT SELECT ON alice.unpartitioned TO regress_admin;" + ) + _expect_replication( + publisher, subscriber, 2, 7, 11, "with all privileges can replicate" + ) + + # SET ROLE without INHERIT does not grant table privileges here. + subscriber.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "REVOKE ALL PRIVILEGES ON alice.unpartitioned FROM regress_admin;\n" + "RESET SESSION AUTHORIZATION;\n" + "GRANT regress_alice TO regress_admin WITH INHERIT FALSE, SET TRUE;" + ) + _publish_insert(publisher, 13) + offset = _expect_failure( + subscriber, offset, 2, 7, 11, "with SET ROLE but not INHERIT cannot replicate" + ) + + # INHERIT without SET ROLE works. + subscriber.safe_psql( + "GRANT regress_alice TO regress_admin WITH INHERIT TRUE, SET FALSE;" + ) + _expect_replication( + publisher, subscriber, 3, 7, 13, "with INHERIT but not SET ROLE can replicate" + ) + + # Back to SET ROLE only: fails again. + subscriber.safe_psql( + "SET SESSION AUTHORIZATION regress_alice;\n" + "REVOKE ALL PRIVILEGES ON alice.unpartitioned FROM regress_admin;\n" + "RESET SESSION AUTHORIZATION;\n" + "GRANT regress_alice TO regress_admin WITH INHERIT FALSE, SET TRUE;" + ) + _publish_insert(publisher, 14) + offset = _expect_failure( + subscriber, offset, 3, 7, 13, "with no privileges cannot replicate" + ) + + # run_as_owner = false: replication runs as the table owner. + subscriber.safe_psql("ALTER SUBSCRIPTION admin_sub SET (run_as_owner = false);") + _expect_replication( + publisher, + subscriber, + 4, + 7, + 14, + "can replicate after setting run_as_owner false", + ) + + # Initial data sync as table owner (new subscription owned by admin2). + subscriber.safe_psql("DROP SUBSCRIPTION admin_sub;\nTRUNCATE alice.unpartitioned;") + subscriber.safe_psql( + "SET SESSION AUTHORIZATION regress_admin2;\n" + "CREATE SUBSCRIPTION admin_sub CONNECTION '{}' PUBLICATION alice " + "WITH (run_as_owner = false, password_required = false, " + "copy_data = true, enabled = false);".format(connstr) + ) + subscriber.safe_psql("ALTER ROLE regress_admin2 NOSUPERUSER") + subscriber.safe_psql( + "GRANT regress_alice TO regress_admin2 WITH INHERIT FALSE, SET TRUE;\n" + "ALTER SUBSCRIPTION admin_sub ENABLE;" + ) + subscriber.wait_for_subscription_sync(publisher, "admin_sub") + _expect_replication( + publisher, subscriber, 4, 7, 14, "table owner can do the initial data copy" + ) diff --git a/src/test/subscription/pyt/test_034_temporal.py b/src/test/subscription/pyt/test_034_temporal.py new file mode 100644 index 0000000000000..885f7ee17196d --- /dev/null +++ b/src/test/subscription/pyt/test_034_temporal.py @@ -0,0 +1,466 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/subscription/t/034_temporal.pl. + +Logical replication of temporal tables (WITHOUT OVERLAPS primary/unique keys +over int4range + daterange columns): initial sync and incremental changes, and +the replica-identity error messages for UPDATE/DELETE on tables lacking a +usable replica identity. Generated from the Perl original via +.agent/gen_golden.py with the two helper subroutines inlined as nested +functions. +""" + + +def test_034_temporal(create_pg): + """Logical replication of temporal tables and replica-identity errors.""" + node_publisher = create_pg("publisher", allows_streaming="logical", start=False) + node_publisher.start() + node_subscriber = create_pg("subscriber", start=False) + node_subscriber.start() + publisher_connstr = node_publisher.connstr() + " dbname=postgres" + + def create_tables(): + for node in (node_publisher, node_subscriber): + node.safe_psql( + "CREATE TABLE temporal_no_key (id int4range, valid_at daterange, a text)" + ) + node.safe_psql( + "CREATE TABLE temporal_pk (id int4range, valid_at daterange, a text, PRIMARY KEY (id, valid_at WITHOUT OVERLAPS))" + ) + node.safe_psql( + "CREATE TABLE temporal_unique (id int4range, valid_at daterange, a text, UNIQUE (id, valid_at WITHOUT OVERLAPS))" + ) + + def drop_everything(): + node_publisher.safe_psql("DROP TABLE IF EXISTS temporal_no_key") + node_publisher.safe_psql("DROP TABLE IF EXISTS temporal_pk") + node_publisher.safe_psql("DROP TABLE IF EXISTS temporal_unique") + node_publisher.safe_psql("DROP PUBLICATION pub1") + node_subscriber.safe_psql("DROP TABLE IF EXISTS temporal_no_key") + node_subscriber.safe_psql("DROP TABLE IF EXISTS temporal_pk") + node_subscriber.safe_psql("DROP TABLE IF EXISTS temporal_unique") + node_subscriber.safe_psql("DROP SUBSCRIPTION sub1") + + create_tables() + node_publisher.safe_psql( + "INSERT INTO temporal_no_key (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql( + "INSERT INTO temporal_pk (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql( + "INSERT INTO temporal_unique (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("CREATE PUBLICATION pub1 FOR ALL TABLES") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub1" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_no_key ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_no_key DEFAULT" + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_pk ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_pk DEFAULT" + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_unique ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_unique DEFAULT" + node_publisher.safe_psql( + "INSERT INTO temporal_no_key (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + result = node_publisher.psql_capture( + "UPDATE temporal_no_key SET a = 'b' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot update table "temporal_no_key" because it does not have a replica identity and publishes updates\nHINT: To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't UPDATE temporal_no_key DEFAULT" + result = node_publisher.psql_capture( + "DELETE FROM temporal_no_key WHERE id = '[3,4)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_no_key" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE temporal_no_key DEFAULT" + result = node_publisher.psql_capture( + "DELETE FROM temporal_no_key FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_no_key" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE FOR PORTION OF temporal_no_key DEFAULT" + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_no_key ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2010-01-01)|a\n[3,4)|[2000-01-01,2010-01-01)|a\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_no_key DEFAULT" + node_publisher.safe_psql( + "INSERT INTO temporal_pk (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("UPDATE temporal_pk SET a = 'b' WHERE id = '[2,3)'") + node_publisher.safe_psql( + "UPDATE temporal_pk FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01' SET a = 'c' WHERE id = '[2,3)'" + ) + node_publisher.safe_psql("DELETE FROM temporal_pk WHERE id = '[3,4)'") + node_publisher.safe_psql( + "DELETE FROM temporal_pk FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_pk ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2001-01-01)|b\n[2,3)|[2001-01-01,2002-01-01)|c\n[2,3)|[2003-01-01,2010-01-01)|b\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_pk DEFAULT" + node_publisher.safe_psql( + "INSERT INTO temporal_unique (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + result = node_publisher.psql_capture( + "UPDATE temporal_unique SET a = 'b' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot update table "temporal_unique" because it does not have a replica identity and publishes updates\nHINT: To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't UPDATE temporal_unique DEFAULT" + result = node_publisher.psql_capture( + "DELETE FROM temporal_unique WHERE id = '[3,4)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_unique" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE temporal_unique DEFAULT" + result = node_publisher.psql_capture( + "DELETE FROM temporal_unique FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_unique" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE FOR PORTION OF temporal_unique DEFAULT" + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_unique ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2010-01-01)|a\n[3,4)|[2000-01-01,2010-01-01)|a\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_unique DEFAULT" + drop_everything() + create_tables() + node_publisher.safe_psql("ALTER TABLE temporal_no_key REPLICA IDENTITY FULL") + node_publisher.safe_psql("ALTER TABLE temporal_pk REPLICA IDENTITY FULL") + node_publisher.safe_psql("ALTER TABLE temporal_unique REPLICA IDENTITY FULL") + node_subscriber.safe_psql("ALTER TABLE temporal_no_key REPLICA IDENTITY FULL") + node_subscriber.safe_psql("ALTER TABLE temporal_pk REPLICA IDENTITY FULL") + node_subscriber.safe_psql("ALTER TABLE temporal_unique REPLICA IDENTITY FULL") + node_publisher.safe_psql( + "INSERT INTO temporal_no_key (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql( + "INSERT INTO temporal_pk (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql( + "INSERT INTO temporal_unique (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("CREATE PUBLICATION pub1 FOR ALL TABLES") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub1" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_no_key ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_no_key FULL" + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_pk ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_pk FULL" + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_unique ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_unique FULL" + node_publisher.safe_psql( + "INSERT INTO temporal_no_key (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("UPDATE temporal_no_key SET a = 'b' WHERE id = '[2,3)'") + node_publisher.safe_psql( + "UPDATE temporal_no_key FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01' SET a = 'c' WHERE id = '[2,3)'" + ) + node_publisher.safe_psql("DELETE FROM temporal_no_key WHERE id = '[3,4)'") + node_publisher.safe_psql( + "DELETE FROM temporal_no_key FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_no_key ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2001-01-01)|b\n[2,3)|[2001-01-01,2002-01-01)|c\n[2,3)|[2003-01-01,2010-01-01)|b\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_no_key FULL" + node_publisher.safe_psql( + "INSERT INTO temporal_pk (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("UPDATE temporal_pk SET a = 'b' WHERE id = '[2,3)'") + node_publisher.safe_psql( + "UPDATE temporal_pk FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01' SET a = 'c' WHERE id = '[2,3)'" + ) + node_publisher.safe_psql("DELETE FROM temporal_pk WHERE id = '[3,4)'") + node_publisher.safe_psql( + "DELETE FROM temporal_pk FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_pk ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2001-01-01)|b\n[2,3)|[2001-01-01,2002-01-01)|c\n[2,3)|[2003-01-01,2010-01-01)|b\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_pk FULL" + node_publisher.safe_psql( + "INSERT INTO temporal_unique (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("UPDATE temporal_unique SET a = 'b' WHERE id = '[2,3)'") + node_publisher.safe_psql( + "UPDATE temporal_unique FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01' SET a = 'c' WHERE id = '[2,3)'" + ) + node_publisher.safe_psql("DELETE FROM temporal_unique WHERE id = '[3,4)'") + node_publisher.safe_psql( + "DELETE FROM temporal_unique FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_unique ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2001-01-01)|b\n[2,3)|[2001-01-01,2002-01-01)|c\n[2,3)|[2003-01-01,2010-01-01)|b\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_unique FULL" + drop_everything() + node_publisher.safe_psql( + "CREATE TABLE temporal_pk (id int4range, valid_at daterange, a text, PRIMARY KEY (id, valid_at WITHOUT OVERLAPS))" + ) + node_publisher.safe_psql( + "ALTER TABLE temporal_pk REPLICA IDENTITY USING INDEX temporal_pk_pkey" + ) + node_publisher.safe_psql( + "CREATE TABLE temporal_unique (id int4range NOT NULL, valid_at daterange NOT NULL, a text, UNIQUE (id, valid_at WITHOUT OVERLAPS))" + ) + node_publisher.safe_psql( + "ALTER TABLE temporal_unique REPLICA IDENTITY USING INDEX temporal_unique_id_valid_at_key" + ) + node_subscriber.safe_psql( + "CREATE TABLE temporal_pk (id int4range, valid_at daterange, a text, PRIMARY KEY (id, valid_at WITHOUT OVERLAPS))" + ) + node_subscriber.safe_psql( + "ALTER TABLE temporal_pk REPLICA IDENTITY USING INDEX temporal_pk_pkey" + ) + node_subscriber.safe_psql( + "CREATE TABLE temporal_unique (id int4range NOT NULL, valid_at daterange NOT NULL, a text, UNIQUE (id, valid_at WITHOUT OVERLAPS))" + ) + node_subscriber.safe_psql( + "ALTER TABLE temporal_unique REPLICA IDENTITY USING INDEX temporal_unique_id_valid_at_key" + ) + node_publisher.safe_psql( + "INSERT INTO temporal_pk (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql( + "INSERT INTO temporal_unique (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("CREATE PUBLICATION pub1 FOR ALL TABLES") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub1" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_pk ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_pk USING INDEX" + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_unique ORDER BY id, valid_at" + ) + assert ( + result == "[1,2)|[2000-01-01,2010-01-01)|a" + ), "synced temporal_unique USING INDEX" + node_publisher.safe_psql( + "INSERT INTO temporal_pk (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("UPDATE temporal_pk SET a = 'b' WHERE id = '[2,3)'") + node_publisher.safe_psql( + "UPDATE temporal_pk FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01' SET a = 'c' WHERE id = '[2,3)'" + ) + node_publisher.safe_psql("DELETE FROM temporal_pk WHERE id = '[3,4)'") + node_publisher.safe_psql( + "DELETE FROM temporal_pk FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_pk ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2001-01-01)|b\n[2,3)|[2001-01-01,2002-01-01)|c\n[2,3)|[2003-01-01,2010-01-01)|b\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_pk USING INDEX" + node_publisher.safe_psql( + "INSERT INTO temporal_unique (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("UPDATE temporal_unique SET a = 'b' WHERE id = '[2,3)'") + node_publisher.safe_psql( + "UPDATE temporal_unique FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01' SET a = 'c' WHERE id = '[2,3)'" + ) + node_publisher.safe_psql("DELETE FROM temporal_unique WHERE id = '[3,4)'") + node_publisher.safe_psql( + "DELETE FROM temporal_unique FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_unique ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2001-01-01)|b\n[2,3)|[2001-01-01,2002-01-01)|c\n[2,3)|[2003-01-01,2010-01-01)|b\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_unique USING INDEX" + drop_everything() + create_tables() + node_publisher.safe_psql("ALTER TABLE temporal_no_key REPLICA IDENTITY NOTHING") + node_publisher.safe_psql("ALTER TABLE temporal_pk REPLICA IDENTITY NOTHING") + node_publisher.safe_psql("ALTER TABLE temporal_unique REPLICA IDENTITY NOTHING") + node_subscriber.safe_psql("ALTER TABLE temporal_no_key REPLICA IDENTITY NOTHING") + node_subscriber.safe_psql("ALTER TABLE temporal_pk REPLICA IDENTITY NOTHING") + node_subscriber.safe_psql("ALTER TABLE temporal_unique REPLICA IDENTITY NOTHING") + node_publisher.safe_psql( + "INSERT INTO temporal_no_key (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql( + "INSERT INTO temporal_pk (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql( + "INSERT INTO temporal_unique (id, valid_at, a)\n VALUES ('[1,2)', '[2000-01-01,2010-01-01)', 'a')" + ) + node_publisher.safe_psql("CREATE PUBLICATION pub1 FOR ALL TABLES") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '" + + publisher_connstr + + "' PUBLICATION pub1" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_no_key ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_no_key NOTHING" + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_pk ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_pk NOTHING" + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_unique ORDER BY id, valid_at" + ) + assert result == "[1,2)|[2000-01-01,2010-01-01)|a", "synced temporal_unique NOTHING" + node_publisher.safe_psql( + "INSERT INTO temporal_no_key (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + result = node_publisher.psql_capture( + "UPDATE temporal_no_key SET a = 'b' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot update table "temporal_no_key" because it does not have a replica identity and publishes updates\nHINT: To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't UPDATE temporal_no_key NOTHING" + result = node_publisher.psql_capture( + "DELETE FROM temporal_no_key WHERE id = '[3,4)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_no_key" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE temporal_no_key NOTHING" + result = node_publisher.psql_capture( + "DELETE FROM temporal_no_key FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_no_key" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE temporal_no_key NOTHING" + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_no_key ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2010-01-01)|a\n[3,4)|[2000-01-01,2010-01-01)|a\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_no_key NOTHING" + node_publisher.safe_psql( + "INSERT INTO temporal_pk (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + result = node_publisher.psql_capture( + "UPDATE temporal_pk SET a = 'b' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot update table "temporal_pk" because it does not have a replica identity and publishes updates\nHINT: To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't UPDATE temporal_pk NOTHING" + result = node_publisher.psql_capture("DELETE FROM temporal_pk WHERE id = '[3,4)'") + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_pk" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE temporal_pk NOTHING" + result = node_publisher.psql_capture( + "DELETE FROM temporal_pk FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_pk" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE temporal_pk NOTHING" + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_pk ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2010-01-01)|a\n[3,4)|[2000-01-01,2010-01-01)|a\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_pk NOTHING" + node_publisher.safe_psql( + "INSERT INTO temporal_unique (id, valid_at, a)\n VALUES ('[2,3)', '[2000-01-01,2010-01-01)', 'a'),\n ('[3,4)', '[2000-01-01,2010-01-01)', 'a'),\n ('[4,5)', '[2000-01-01,2010-01-01)', 'a')" + ) + result = node_publisher.psql_capture( + "UPDATE temporal_unique SET a = 'b' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot update table "temporal_unique" because it does not have a replica identity and publishes updates\nHINT: To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't UPDATE temporal_unique NOTHING" + result = node_publisher.psql_capture( + "DELETE FROM temporal_unique WHERE id = '[3,4)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_unique" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE temporal_unique NOTHING" + result = node_publisher.psql_capture( + "DELETE FROM temporal_unique FOR PORTION OF valid_at FROM '2002-01-01' TO '2003-01-01' WHERE id = '[2,3)'" + ) + assert ( + result.stderr + == 'psql::1: ERROR: cannot delete from table "temporal_unique" because it does not have a replica identity and publishes deletes\nHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.' + ), "can't DELETE FOR PORTION OF temporal_unique NOTHING" + node_publisher.wait_for_catchup("sub1") + result = node_subscriber.safe_psql( + "SELECT * FROM temporal_unique ORDER BY id, valid_at" + ) + assert ( + result + == "[1,2)|[2000-01-01,2010-01-01)|a\n[2,3)|[2000-01-01,2010-01-01)|a\n[3,4)|[2000-01-01,2010-01-01)|a\n[4,5)|[2000-01-01,2010-01-01)|a" + ), "replicated temporal_unique NOTHING" + drop_everything() diff --git a/src/test/subscription/pyt/test_035_conflicts.py b/src/test/subscription/pyt/test_035_conflicts.py new file mode 100644 index 0000000000000..5f6b56220f62c --- /dev/null +++ b/src/test/subscription/pyt/test_035_conflicts.py @@ -0,0 +1,454 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/035_conflicts.pl. + +Conflict detection in logical replication: multiple_unique_conflicts on +INSERT/UPDATE (including a leaf partition), and a bidirectional setup that +exercises delete_origin_differs / update_deleted conflicts, the +pg_conflict_detection slot lifecycle, retain_dead_tuples DDL rules, +max_retention_duration stop/resume, and (with injection_points) retention of a +deleted tuple across a DELAY_CHKPT_IN_COMMIT prepared transaction. +""" + +import re + +import pypg + +_MUC_INSERT = ( + r"conflict detected on relation \"public.conf_tab\": " + r"conflict=multiple_unique_conflicts.*\n" + r".*Could not apply remote change: remote row \(2, 3, 4\).*\n" + r".*Key already exists in unique index \"conf_tab_pkey\", modified in " + r"transaction .*: key \(a\)=\(2\), local row \(2, 2, 2\).*\n" + r".*Key already exists in unique index \"conf_tab_b_key\", modified in " + r"transaction .*: key \(b\)=\(3\), local row \(3, 3, 3\).*\n" + r".*Key already exists in unique index \"conf_tab_c_key\", modified in " + r"transaction .*: key \(c\)=\(4\), local row \(4, 4, 4\)." +) +_MUC_UPDATE = ( + r"conflict detected on relation \"public.conf_tab\": " + r"conflict=multiple_unique_conflicts.*\n" + r".*Could not apply remote change: remote row \(6, 7, 8\), " + r"replica identity \(a\)=\(5\).*\n" + r".*Key already exists in unique index \"conf_tab_pkey\", modified in " + r"transaction .*: key \(a\)=\(6\), local row \(6, 6, 6\).*\n" + r".*Key already exists in unique index \"conf_tab_b_key\", modified in " + r"transaction .*: key \(b\)=\(7\), local row \(7, 7, 7\).*\n" + r".*Key already exists in unique index \"conf_tab_c_key\", modified in " + r"transaction .*: key \(c\)=\(8\), local row \(8, 8, 8\)." +) +_MUC_PARTITION = ( + r"conflict detected on relation \"public.conf_tab_2_p1\": " + r"conflict=multiple_unique_conflicts.*\n" + r".*Could not apply remote change: remote row \(55, 2, 3\).*\n" + r".*Key already exists in unique index \"conf_tab_2_p1_pkey\", modified in " + r"transaction .*: key \(a\)=\(55\), local row \(55, 2, 3\).*\n" + r".*Key already exists in unique index \"conf_tab_2_p1_a_b_key\", modified " + r"in transaction .*: key \(a, b\)=\(55, 2\), local row \(55, 2, 3\)." +) +_DELETE_ORIGIN_DIFFERS = ( + r'conflict detected on relation "public.tab": conflict=delete_origin_differs.*\n' + r".*DETAIL:.* Deleting the row that was modified locally in transaction " + r"[0-9]+ at .*: local row \(1, 3\), replica identity \(a\)=\(1\)." +) +_UPDATE_DELETED = ( + r'conflict detected on relation "public.tab": conflict=update_deleted.*\n' + r".*DETAIL:.* Could not find the row to be updated: remote row \(1, 3\), " + r"replica identity \(a\)=\(1\).\n" + r".*The row to be updated was deleted locally in transaction [0-9]+ at .*" +) +_UPDATE_DELETED_FULL = ( + r'conflict detected on relation "public.tab": conflict=update_deleted.*\n' + r".*DETAIL:.* Could not find the row to be updated: remote row \(2, 4\), " + r"replica identity full \(2, 2\).*\n" + r".*The row to be updated was deleted locally in transaction [0-9]+ at .*" +) +_UPDATE_DELETED_INJ = ( + r'conflict detected on relation "public.tab": conflict=update_deleted.*\n' + r".*DETAIL:.* Could not find the row to be updated: remote row \(1, 2\), " + r"replica identity full \(1, 1\).*\n" + r".*The row to be updated was deleted locally in transaction [0-9]+ at .*" +) +_RETENTION_RESUME = ( + r'logical replication worker for subscription "tap_sub_a_b" will resume ' + r"retaining the information for detecting conflicts\n" + r".*DETAIL:.* Retention is re-enabled because max_retention_duration has " + r"been set to unlimited.*" +) + +_APPLY_WORKER_STOPPED = ( + "SELECT count(*) = 0 FROM pg_stat_activity " + "WHERE backend_type = 'logical replication apply worker'" +) +_SLOT = "pg_replication_slots WHERE slot_name = 'pg_conflict_detection'" + + +def _setup_unidirectional(create_pg): + """Create pub/sub nodes, tables, publication and subscription.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber", allows_streaming="logical") + + publisher.safe_psql( + "CREATE TABLE conf_tab (a int PRIMARY KEY, b int UNIQUE, c int UNIQUE);" + ) + publisher.safe_psql( + "CREATE TABLE conf_tab_2 (a int PRIMARY KEY, b int UNIQUE, c int UNIQUE);" + ) + subscriber.safe_psql( + "CREATE TABLE conf_tab (a int PRIMARY key, b int UNIQUE, c int UNIQUE);" + ) + subscriber.safe_psql( + "CREATE TABLE conf_tab_2 (a int PRIMARY KEY, b int, c int, unique(a,b)) " + "PARTITION BY RANGE (a);\n" + "CREATE TABLE conf_tab_2_p1 PARTITION OF conf_tab_2 " + "FOR VALUES FROM (MINVALUE) TO (100);" + ) + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION pub_tab FOR TABLE conf_tab, conf_tab_2") + appname = "sub_tab" + subscriber.safe_psql( + "CREATE SUBSCRIPTION sub_tab\n" + "CONNECTION '{} application_name={}'\n" + "PUBLICATION pub_tab;".format(connstr, appname) + ) + subscriber.wait_for_subscription_sync(publisher, appname) + return publisher, subscriber, appname + + +def _test_multiple_unique_conflicts(publisher, subscriber): + """multiple_unique_conflicts on INSERT, UPDATE and a leaf partition.""" + publisher.safe_psql("INSERT INTO conf_tab VALUES (1,1,1);") + subscriber.safe_psql("INSERT INTO conf_tab VALUES (2,2,2), (3,3,3), (4,4,4);") + + offset = subscriber.current_log_position() + publisher.safe_psql("INSERT INTO conf_tab VALUES (2,3,4);") + subscriber.wait_for_log(_MUC_INSERT, offset) + subscriber.safe_psql("TRUNCATE conf_tab;") + + offset = subscriber.current_log_position() + publisher.safe_psql("INSERT INTO conf_tab VALUES (5,5,5);") + subscriber.safe_psql("INSERT INTO conf_tab VALUES (6,6,6), (7,7,7), (8,8,8);") + publisher.safe_psql("UPDATE conf_tab set a=6, b=7, c=8 where a=5;") + subscriber.wait_for_log(_MUC_UPDATE, offset) + subscriber.safe_psql("TRUNCATE conf_tab;") + + subscriber.safe_psql("INSERT INTO conf_tab_2 VALUES (55,2,3);") + publisher.safe_psql("INSERT INTO conf_tab_2 VALUES (55,2,3);") + subscriber.wait_for_log(_MUC_PARTITION, offset) + + +def _setup_bidirectional(node_a, node_b): + """Set up bidirectional replication of table tab between node_a/node_b.""" + node_a.append_conf( + "track_commit_timestamp = on\nautovacuum = off\nlog_min_messages = 'debug2'" + ) + node_a.restart() + node_b.append_conf("track_commit_timestamp = on") + node_b.restart() + + node_a.safe_psql("CREATE TABLE tab (a int PRIMARY KEY, b int)") + node_b.safe_psql("CREATE TABLE tab (a int PRIMARY KEY, b int)") + + subname_ab, subname_ba = "tap_sub_a_b", "tap_sub_b_a" + a_connstr = node_a.connstr() + " dbname=postgres" + node_a.safe_psql("CREATE PUBLICATION tap_pub_A FOR TABLE tab") + node_b.safe_psql( + "CREATE SUBSCRIPTION {sub}\n" + "CONNECTION '{conn} application_name={sub}'\n" + "PUBLICATION tap_pub_A\n" + "WITH (origin = none, retain_dead_tuples = true)".format( + sub=subname_ba, conn=a_connstr + ) + ) + b_connstr = node_b.connstr() + " dbname=postgres" + node_b.safe_psql("CREATE PUBLICATION tap_pub_B FOR TABLE tab") + node_a.safe_psql( + "CREATE SUBSCRIPTION {sub}\n" + "CONNECTION '{conn} application_name={sub}'\n" + "PUBLICATION tap_pub_B\n" + "WITH (origin = none, copy_data = off)".format(sub=subname_ab, conn=b_connstr) + ) + node_a.wait_for_subscription_sync(node_b, subname_ab) + node_b.wait_for_subscription_sync(node_a, subname_ba) + assert node_b.poll_query_until( + "SELECT xmin IS NOT NULL from {}".format(_SLOT) + ), "the xmin value of slot 'pg_conflict_detection' is valid on Node B" + return subname_ab, subname_ba + + +def _test_retain_dead_tuples_ddl(node_a, subname_ab): + """retain_dead_tuples DDL rules and the origin=any warning.""" + result = node_a.psql_capture( + "ALTER SUBSCRIPTION {} SET (retain_dead_tuples = true)".format(subname_ab), + on_error_stop=False, + ) + assert re.search( + r'ERROR: cannot set option "retain_dead_tuples" for enabled subscription', + result.stderr, + ), "altering retain_dead_tuples is not allowed for enabled subscription" + + node_a.psql_capture( + "ALTER SUBSCRIPTION {} DISABLE;".format(subname_ab), on_error_stop=False + ) + node_a.poll_query_until(_APPLY_WORKER_STOPPED) + + result = node_a.psql_capture( + "ALTER SUBSCRIPTION {} SET (retain_dead_tuples = true);".format(subname_ab), + on_error_stop=False, + ) + assert re.search( + r"NOTICE: deleted rows to detect conflicts would not be removed until " + r"the subscription is enabled", + result.stderr, + ), "altering retain_dead_tuples is allowed for disabled subscription" + + node_a.safe_psql("ALTER SUBSCRIPTION {} ENABLE;".format(subname_ab)) + assert node_a.poll_query_until( + "SELECT xmin IS NOT NULL from {}".format(_SLOT) + ), "the xmin value of slot 'pg_conflict_detection' is valid on Node A" + + result = node_a.psql_capture( + "ALTER SUBSCRIPTION {} SET (origin = any);".format(subname_ab), + on_error_stop=False, + ) + assert re.search( + r'WARNING: subscription "tap_sub_a_b" enabled retain_dead_tuples but ' + r"might not reliably detect conflicts for changes from different origins", + result.stderr, + ), "warn of receiving changes from origins other than the publisher" + node_a.psql_capture( + "ALTER SUBSCRIPTION {} SET (origin = none);".format(subname_ab), + on_error_stop=False, + ) + + +def _test_update_deleted_conflicts(node_a, node_b, subname_ab, subname_ba): + """delete_origin_differs and update_deleted conflicts, xmin advancement.""" + node_a.safe_psql("INSERT INTO tab VALUES (1, 1), (2, 2);") + node_a.wait_for_catchup(subname_ba) + assert ( + node_b.safe_psql("SELECT * FROM tab;") == "1|1\n2|2" + ), "check replicated insert on node B" + + node_a.safe_psql("ALTER SUBSCRIPTION {} DISABLE".format(subname_ab)) + node_a.poll_query_until(_APPLY_WORKER_STOPPED) + + log_location = node_b.current_log_position() + node_b.safe_psql("UPDATE tab SET b = 3 WHERE a = 1;") + node_a.safe_psql("DELETE FROM tab WHERE a = 1;") + result = node_a.psql_capture("VACUUM (verbose) public.tab;", on_error_stop=False) + assert re.search( + r"1 are dead but not yet removable", result.stderr + ), "the deleted column is non-removable" + node_a.wait_for_catchup(subname_ba) + logfile = pypg.slurp_file(node_b.log, log_location) + assert re.search( + _DELETE_ORIGIN_DIFFERS, logfile + ), "delete target row was modified in tab" + + log_location = node_a.current_log_position() + node_a.safe_psql("ALTER SUBSCRIPTION {} ENABLE;".format(subname_ab)) + node_b.wait_for_catchup(subname_ab) + logfile = pypg.slurp_file(node_a.log, log_location) + assert re.search(_UPDATE_DELETED, logfile), "update target row was deleted in tab" + + next_xid = node_a.safe_psql("SELECT txid_current() + 1;") + assert node_a.poll_query_until( + "SELECT xmin = {} from {}".format(next_xid, _SLOT) + ), "the xmin value of slot 'pg_conflict_detection' is updated on Node A" + + +def _test_seqscan_deleted_tuple(node_a, node_b, subname_ab): + """update_deleted via sequential scan with REPLICA IDENTITY FULL.""" + node_a.safe_psql("ALTER TABLE tab REPLICA IDENTITY FULL") + node_b.safe_psql("ALTER TABLE tab REPLICA IDENTITY FULL") + node_a.safe_psql("ALTER TABLE tab DROP CONSTRAINT tab_pkey;") + + node_a.safe_psql("ALTER SUBSCRIPTION {} DISABLE".format(subname_ab)) + node_a.poll_query_until(_APPLY_WORKER_STOPPED) + + node_b.safe_psql("UPDATE tab SET b = 4 WHERE a = 2;") + node_a.safe_psql("DELETE FROM tab WHERE a = 2;") + + log_location = node_a.current_log_position() + node_a.safe_psql("ALTER SUBSCRIPTION {} ENABLE;".format(subname_ab)) + node_b.wait_for_catchup(subname_ab) + logfile = pypg.slurp_file(node_a.log, log_location) + assert re.search( + _UPDATE_DELETED_FULL, logfile + ), "update target row was deleted in tab" + + +def _test_xmin_advance_no_tables(node_a, node_b, subname_ab): + """The slot xmin advances when the subscription has no tables.""" + node_b.safe_psql("ALTER PUBLICATION tap_pub_B DROP TABLE tab") + node_a.safe_psql("ALTER SUBSCRIPTION {} REFRESH PUBLICATION".format(subname_ab)) + next_xid = node_a.safe_psql("SELECT txid_current() + 1;") + assert node_a.poll_query_until( + "SELECT xmin = {} from {}".format(next_xid, _SLOT) + ), "the xmin value of slot 'pg_conflict_detection' is updated on Node A" + node_b.safe_psql("ALTER PUBLICATION tap_pub_B ADD TABLE tab") + node_a.safe_psql( + "ALTER SUBSCRIPTION {} REFRESH PUBLICATION WITH (copy_data = false)".format( + subname_ab + ) + ) + + +def _test_delay_chkpt_injection(node_a, node_b, subname_ab, subname_ba): + """DELAY_CHKPT_IN_COMMIT prepared txn retains a concurrently-deleted tuple.""" + node_b.append_conf( + "shared_preload_libraries = 'injection_points'\nmax_prepared_transactions = 1" + ) + node_b.restart() + node_b.psql_capture( + "ALTER SUBSCRIPTION {} DISABLE;".format(subname_ba), on_error_stop=False + ) + node_b.poll_query_until(_APPLY_WORKER_STOPPED) + node_b.safe_psql("TRUNCATE tab;\nINSERT INTO tab VALUES(1, 1);") + node_b.wait_for_catchup(subname_ab) + node_b.safe_psql( + "CREATE EXTENSION injection_points;\n" + "SELECT injection_points_attach('commit-after-delay-checkpoint', 'wait');" + ) + pub_session = node_b.background_psql("postgres") + pub_session.query_until( + r"starting_bg_psql", + "\\echo starting_bg_psql\n" + "BEGIN;\n" + "UPDATE tab SET b = 2 WHERE a = 1;\n" + "PREPARE TRANSACTION 'txn_with_later_commit_ts';\n" + "COMMIT PREPARED 'txn_with_later_commit_ts';\n", + ) + node_b.wait_for_event("client backend", "commit-after-delay-checkpoint") + assert ( + node_b.safe_psql("SELECT * FROM tab WHERE a = 1") == "1|1" + ), "publisher sees the old row" + + node_a.safe_psql("DELETE FROM tab WHERE a = 1;") + sub_ts = node_a.safe_psql("SELECT timestamp FROM pg_last_committed_xact();") + + log_location = node_a.current_log_position() + node_a.wait_for_log(r"sending publisher status request message", log_location) + log_location = node_a.current_log_position() + node_a.wait_for_log(r"sending publisher status request message", log_location) + + result = node_a.psql_capture("VACUUM (verbose) public.tab;", on_error_stop=False) + assert re.search( + r"1 are dead but not yet removable", result.stderr + ), "the deleted column is non-removable" + + log_location = node_a.current_log_position() + node_b.safe_psql( + "SELECT injection_points_wakeup('commit-after-delay-checkpoint');\n" + "SELECT injection_points_detach('commit-after-delay-checkpoint');" + ) + assert pub_session.quit() == 0, "close publisher session" + assert ( + node_b.safe_psql("SELECT * FROM tab WHERE a = 1") == "1|2" + ), "publisher sees the new row" + node_b.wait_for_catchup(subname_ab) + logfile = pypg.slurp_file(node_a.log, log_location) + assert re.search( + _UPDATE_DELETED_INJ, logfile + ), "update target row was deleted in tab" + + next_xid = node_a.safe_psql("SELECT txid_current() + 1;") + assert node_a.poll_query_until( + "SELECT xmin = {} from {}".format(next_xid, _SLOT) + ), "the xmin value of slot 'pg_conflict_detection' is updated on subscriber" + + pub_ts = node_b.safe_psql( + "SELECT pg_xact_commit_timestamp(xmin) from tab where a=1;" + ) + assert ( + node_b.safe_psql( + "SELECT '{}'::timestamp >= '{}'::timestamp".format(pub_ts, sub_ts) + ) + == "t" + ), "pub UPDATE's timestamp is later than that of sub's DELETE" + node_b.psql_capture( + "ALTER SUBSCRIPTION {} ENABLE;".format(subname_ba), on_error_stop=False + ) + + +def _test_max_retention_duration(node_a, node_b, subname_ab): + """Retention stops past max_retention_duration and resumes when set to 0.""" + node_b.safe_psql("SELECT * FROM pg_create_physical_replication_slot('blocker');") + node_b.append_conf("synchronized_standby_slots = 'blocker'") + node_b.reload() + node_a.safe_psql("ALTER SUBSCRIPTION {} DISABLE;".format(subname_ab)) + node_a.safe_psql("ALTER SUBSCRIPTION {} SET (failover = true);".format(subname_ab)) + node_a.safe_psql("ALTER SUBSCRIPTION {} ENABLE;".format(subname_ab)) + node_b.safe_psql("INSERT INTO tab VALUES (5, 5);") + node_a.safe_psql("SELECT txid_current() + 1;") + + offset = node_a.current_log_position() + node_a.safe_psql( + "ALTER SUBSCRIPTION {} SET (max_retention_duration = 1);".format(subname_ab) + ) + node_a.wait_for_log( + r'logical replication worker for subscription "tap_sub_a_b" has stopped ' + r"retaining the information for detecting conflicts", + offset, + ) + assert node_a.poll_query_until( + "SELECT xmin IS NULL from {}".format(_SLOT) + ), "the xmin value of slot 'pg_conflict_detection' is invalid on Node A" + assert ( + node_a.safe_psql( + "SELECT subretentionactive FROM pg_subscription " + "WHERE subname='{}';".format(subname_ab) + ) + == "f" + ), "retention is inactive" + + offset = node_a.current_log_position() + node_a.safe_psql( + "ALTER SUBSCRIPTION {} SET (max_retention_duration = 0);".format(subname_ab) + ) + node_b.safe_psql("SELECT * FROM pg_drop_replication_slot('blocker');") + node_b.adjust_conf("synchronized_standby_slots", "''") + node_b.reload() + node_a.wait_for_log(_RETENTION_RESUME, offset) + assert node_a.poll_query_until( + "SELECT xmin IS NOT NULL from {}".format(_SLOT) + ), "the xmin value of slot 'pg_conflict_detection' is valid on Node A" + assert ( + node_a.safe_psql( + "SELECT subretentionactive FROM pg_subscription " + "WHERE subname='{}';".format(subname_ab) + ) + == "t" + ), "retention is active" + + +def _test_slot_dropped(node_a, node_b, subname_ab, subname_ba): + """pg_conflict_detection slot is dropped after removing all subscriptions.""" + node_b.safe_psql("DROP SUBSCRIPTION {}".format(subname_ba)) + assert node_b.poll_query_until( + "SELECT count(*) = 0 FROM {}".format(_SLOT) + ), "the slot 'pg_conflict_detection' has been dropped on Node B" + node_a.safe_psql("DROP SUBSCRIPTION {}".format(subname_ab)) + assert node_a.poll_query_until( + "SELECT count(*) = 0 FROM {}".format(_SLOT) + ), "the slot 'pg_conflict_detection' has been dropped on Node A" + + +def test_035_conflicts(create_pg): + """Logical-replication conflict detection across many scenarios.""" + publisher, subscriber, _appname = _setup_unidirectional(create_pg) + _test_multiple_unique_conflicts(publisher, subscriber) + + node_a, node_b = publisher, subscriber + subname_ab, subname_ba = _setup_bidirectional(node_a, node_b) + _test_retain_dead_tuples_ddl(node_a, subname_ab) + _test_update_deleted_conflicts(node_a, node_b, subname_ab, subname_ba) + _test_seqscan_deleted_tuple(node_a, node_b, subname_ab) + _test_xmin_advance_no_tables(node_a, node_b, subname_ab) + + if node_b.check_extension("injection_points"): + _test_delay_chkpt_injection(node_a, node_b, subname_ab, subname_ba) + + _test_max_retention_duration(node_a, node_b, subname_ab) + _test_slot_dropped(node_a, node_b, subname_ab, subname_ba) diff --git a/src/test/subscription/pyt/test_036_sequences.py b/src/test/subscription/pyt/test_036_sequences.py new file mode 100644 index 0000000000000..c7add444d727a --- /dev/null +++ b/src/test/subscription/pyt/test_036_sequences.py @@ -0,0 +1,136 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/036_sequences.pl. + +Sequences are synced correctly to the subscriber, including REFRESH +PUBLICATION / REFRESH SEQUENCES semantics and mismatch/missing-sequence +warnings. +""" + +_SYNCED = "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r');" +_QUOTE = '"regress\'quote"' + + +def _seq(node, name): + return node.safe_psql("SELECT last_value, is_called FROM {}".format(name)) + + +def test_sequences(create_pg): + """Initial sync, REFRESH PUBLICATION/SEQUENCES, and mismatch warnings.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + + publisher.safe_psql( + "CREATE TABLE regress_seq_test (v BIGINT);\n" + "CREATE SEQUENCE regress_s1;\n" + 'CREATE SEQUENCE "regress\'quote";' + ) + subscriber.safe_psql( + "CREATE TABLE regress_seq_test (v BIGINT);\n" + "CREATE SEQUENCE regress_s1;\n" + "CREATE SEQUENCE regress_s2;\n" + "CREATE SEQUENCE regress_s3;\n" + 'CREATE SEQUENCE "regress\'quote";' + ) + + publisher.safe_psql( + "INSERT INTO regress_seq_test SELECT nextval('regress_s1') " + "FROM generate_series(1,100);\n" + "INSERT INTO regress_seq_test SELECT nextval('\"regress''quote\"') " + "FROM generate_series(1,100);" + ) + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION regress_seq_pub FOR ALL SEQUENCES") + subscriber.safe_psql( + "CREATE SUBSCRIPTION regress_seq_sub CONNECTION '{}' " + "PUBLICATION regress_seq_pub".format(connstr) + ) + assert subscriber.poll_query_until(_SYNCED), "subscriber synchronized" + + assert _seq(subscriber, "regress_s1") == "100|t", "initial test data replicated" + assert _seq(subscriber, _QUOTE) == "100|t", "initial data for quoted sequence name" + + # REFRESH PUBLICATION syncs newly published sequences only. + publisher.safe_psql( + "CREATE SEQUENCE regress_s2;\n" + "INSERT INTO regress_seq_test SELECT nextval('regress_s2') " + "FROM generate_series(1,100);\n" + "INSERT INTO regress_seq_test SELECT nextval('regress_s1') " + "FROM generate_series(1,100);" + ) + subscriber.safe_psql("ALTER SUBSCRIPTION regress_seq_sub REFRESH PUBLICATION;") + assert subscriber.poll_query_until(_SYNCED), "subscriber synchronized" + assert _seq(publisher, "regress_s1") == "200|t", "sequence value in the publisher" + assert _seq(subscriber, "regress_s1") == "100|t", "REFRESH does not sync existing" + assert _seq(subscriber, "regress_s2") == "100|t", "REFRESH syncs newly published" + + # REFRESH SEQUENCES re-syncs existing sequences but not newly added ones. + publisher.safe_psql( + "CREATE SEQUENCE regress_s3;\n" + "INSERT INTO regress_seq_test SELECT nextval('regress_s3') " + "FROM generate_series(1,100);\n" + "INSERT INTO regress_seq_test SELECT nextval('regress_s2') " + "FROM generate_series(1,100);" + ) + subscriber.safe_psql("ALTER SUBSCRIPTION regress_seq_sub REFRESH SEQUENCES;") + assert subscriber.poll_query_until(_SYNCED), "subscriber synchronized" + assert _seq(subscriber, "regress_s1") == "200|t", "REFRESH SEQUENCES syncs existing" + assert _seq(subscriber, "regress_s2") == "200|t", "REFRESH SEQUENCES syncs existing" + assert _seq(subscriber, "regress_s3") == "1|f", "REFRESH SEQUENCES not new sequence" + + # REFRESH PUBLICATION (copy_data=false) does not sync the new sequence. + subscriber.safe_psql( + "ALTER SUBSCRIPTION regress_seq_sub REFRESH PUBLICATION " + "WITH (copy_data = false);" + ) + assert subscriber.poll_query_until(_SYNCED), "subscriber synchronized" + assert _seq(subscriber, "regress_s3") == "1|f", "copy_data=false does not sync new" + + _test_warnings(publisher, subscriber, connstr) + + +def _test_warnings(publisher, subscriber, connstr): + """REFRESH PUBLICATION warns on mismatched/missing sequences.""" + publisher.safe_psql("CREATE SEQUENCE regress_s4 START 1 INCREMENT 2;") + subscriber.safe_psql("CREATE SEQUENCE regress_s4 START 10 INCREMENT 2;") + + offset = subscriber.current_log_position() + subscriber.safe_psql("ALTER SUBSCRIPTION regress_seq_sub REFRESH PUBLICATION") + subscriber.wait_for_log( + r"WARNING: ( [A-Z0-9]+:)? mismatched or renamed sequence on subscriber " + r'\("public.regress_s4"\)', + offset, + ) + publisher.safe_psql("DROP SEQUENCE regress_s4;") + subscriber.wait_for_log( + r"WARNING: ( [A-Z0-9]+:)? missing sequence on publisher " + r'\("public.regress_s4"\)', + offset, + ) + publisher.safe_psql("CREATE SEQUENCE regress_s4 START 10 INCREMENT 2;") + + # Insufficient privileges on a sequence must not disrupt the subscriber: it + # logs a warning and keeps retrying. (The Perl original grants connectivity + # for regress_seq_repl via init auth_extra, which on Unix sockets is a + # no-op; under peer auth we grant it a trust line on the publisher.) + publisher.safe_psql( + "CREATE ROLE regress_seq_repl LOGIN REPLICATION;\n" + "GRANT USAGE ON SCHEMA public TO regress_seq_repl;\n" + "GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO regress_seq_repl;\n" + "REVOKE ALL ON SEQUENCE regress_s2 FROM regress_seq_repl;" + ) + with publisher.reloading() as session: + session.hba.prepend("local all regress_seq_repl trust") + + limited = connstr + " user=regress_seq_repl" + offset = subscriber.current_log_position() + subscriber.safe_psql( + "ALTER SUBSCRIPTION regress_seq_sub CONNECTION '{}'".format(limited) + ) + subscriber.safe_psql("ALTER SUBSCRIPTION regress_seq_sub REFRESH SEQUENCES") + subscriber.wait_for_log( + r"WARNING: ( [A-Z0-9]+:)? missing sequence on publisher " + r'\("public.regress_s2"\)', + offset, + ) diff --git a/src/test/subscription/pyt/test_037_except.py b/src/test/subscription/pyt/test_037_except.py new file mode 100644 index 0000000000000..d3cdd99c76965 --- /dev/null +++ b/src/test/subscription/pyt/test_037_except.py @@ -0,0 +1,166 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/037_except.pl. + +Logical replication of publications with an EXCEPT clause. +""" + +_BINARY_CHANGES = ( + "SELECT count(*) = 0 FROM pg_logical_slot_get_binary_changes(" + "'test_slot', NULL, NULL, 'proto_version', '1', " + "'publication_names', '{}')" +) + + +def _count(node, table): + return node.safe_psql("SELECT count(*) FROM {}".format(table)) + + +def _test_except_root_partition(publisher, subscriber, connstr, pubviaroot): + # A root partitioned table in EXCEPT excludes all its partitions, + # regardless of publish_via_partition_root. + publisher.safe_psql( + "CREATE PUBLICATION tap_pub_part FOR ALL TABLES EXCEPT (TABLE root1) " + "WITH (publish_via_partition_root = {});\n" + "INSERT INTO root1 VALUES (1), (101);".format(pubviaroot) + ) + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub_part CONNECTION '{}' " + "PUBLICATION tap_pub_part".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub_part") + + publisher.safe_psql( + "SELECT slot_name FROM pg_replication_slot_advance(" + "'test_slot', pg_current_wal_lsn())" + ) + publisher.safe_psql("INSERT INTO root1 VALUES (2), (102)") + publisher.safe_psql(_BINARY_CHANGES.format("tap_pub_part")) + publisher.wait_for_catchup("tap_sub_part") + + for table in ("root1", "part1", "part2", "part2_1"): + assert _count(subscriber, table) == "0", "no rows replicated for " + table + + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub_part") + publisher.safe_psql("DROP PUBLICATION tap_pub_part") + + +def _test_multi_publication(publisher, subscriber, connstr, pub2_sql): + """A table excluded by pub1's EXCEPT is replicated when pub2 includes it.""" + publisher.safe_psql(pub2_sql + "\nINSERT INTO tab1 VALUES(1);") + subscriber.psql_capture( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' " + "PUBLICATION tap_pub1, tap_pub2".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + + publisher.safe_psql("INSERT INTO tab1 VALUES(2)") + publisher.wait_for_catchup("tap_sub") + assert ( + publisher.safe_psql("SELECT * FROM tab1 ORDER BY a") == "1\n2" + ), "table in one publication's EXCEPT but included by another is replicated" + + +def test_except(create_pg): + """Publication EXCEPT clause across plain, inherited, and partitioned tables.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + connstr = publisher.connstr() + " dbname=postgres" + + publisher.safe_psql( + "CREATE TABLE tab1 AS SELECT generate_series(1,10) AS a;\n" + "CREATE TABLE parent (a int);\n" + "CREATE TABLE child (b int) INHERITS (parent);\n" + "CREATE TABLE parent1 (a int);\n" + "CREATE TABLE child1 (b int) INHERITS (parent1);" + ) + subscriber.safe_psql( + "CREATE TABLE tab1 (a int);\n" + "CREATE TABLE parent (a int);\n" + "CREATE TABLE child (b int) INHERITS (parent);\n" + "CREATE TABLE parent1 (a int);\n" + "CREATE TABLE child1 (b int) INHERITS (parent1);" + ) + + publisher.safe_psql( + "CREATE PUBLICATION tap_pub FOR ALL TABLES EXCEPT " + "(TABLE tab1, parent, only parent1)" + ) + publisher.safe_psql( + "SELECT pg_create_logical_replication_slot('test_slot', 'pgoutput')" + ) + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' PUBLICATION tap_pub".format( + connstr + ) + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + assert _count(subscriber, "tab1") == "0", "no initial copy for EXCEPT tables" + + publisher.safe_psql( + "INSERT INTO tab1 VALUES(generate_series(11,20));\n" + "INSERT INTO child VALUES(generate_series(11,20), generate_series(11,20));" + ) + assert ( + publisher.safe_psql(_BINARY_CHANGES.format("tap_pub")) == "t" + ), "no changes for EXCEPT tables in the replication slot" + + # ONLY parent1 in EXCEPT excludes only the parent, not its child. + publisher.safe_psql( + "INSERT INTO child1 VALUES(generate_series(11,20), generate_series(11,20))" + ) + publisher.wait_for_catchup("tap_sub") + assert _count(subscriber, "tab1") == "0", "tab1 excluded" + assert _count(subscriber, "child") == "0", "child excluded via parent" + assert _count(subscriber, "child1") == "10", "child1 replicated (ONLY parent1)" + + publisher.safe_psql("CREATE TABLE tab2 AS SELECT generate_series(1,10) AS a") + subscriber.safe_psql("CREATE TABLE tab2 (a int)") + publisher.safe_psql("ALTER PUBLICATION tap_pub SET ALL TABLES EXCEPT (TABLE tab2)") + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION") + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + assert _count(subscriber, "tab2") == "0", "no initial copy for EXCEPT tab2" + assert _count(subscriber, "tab1") == "20", "tab1 copied once removed from EXCEPT" + + subscriber.safe_psql( + "DROP SUBSCRIPTION tap_sub;\nTRUNCATE TABLE tab1;\n" + "DROP TABLE parent, parent1, child, child1, tab2;" + ) + publisher.safe_psql( + "DROP PUBLICATION tap_pub;\nTRUNCATE TABLE tab1;\n" + "DROP TABLE parent, parent1, child, child1, tab2;" + ) + + # Partitioned tables (publisher) mapping to plain tables (subscriber). + publisher.safe_psql( + "CREATE TABLE root1(a int) PARTITION BY RANGE(a);\n" + "CREATE TABLE part1 PARTITION OF root1 FOR VALUES FROM (0) TO (100);\n" + "CREATE TABLE part2 PARTITION OF root1 FOR VALUES FROM (100) TO (200) " + "PARTITION BY RANGE(a);\n" + "CREATE TABLE part2_1 PARTITION OF part2 FOR VALUES FROM (100) TO (150);" + ) + subscriber.safe_psql( + "CREATE TABLE root1(a int);\nCREATE TABLE part1(a int);\n" + "CREATE TABLE part2(a int);\nCREATE TABLE part2_1(a int);" + ) + _test_except_root_partition(publisher, subscriber, connstr, "false") + _test_except_root_partition(publisher, subscriber, connstr, "true") + + # Subscribing to multiple publications. + publisher.safe_psql( + "CREATE PUBLICATION tap_pub1 FOR ALL TABLES EXCEPT (TABLE tab1);" + ) + _test_multi_publication( + publisher, subscriber, connstr, "CREATE PUBLICATION tap_pub2 FOR TABLE tab1;" + ) + publisher.safe_psql("DROP PUBLICATION tap_pub2;\nTRUNCATE tab1;") + subscriber.safe_psql("TRUNCATE tab1") + + _test_multi_publication( + publisher, subscriber, connstr, "CREATE PUBLICATION tap_pub2 FOR ALL TABLES;" + ) + + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + publisher.safe_psql("DROP PUBLICATION tap_pub1") + publisher.safe_psql("DROP PUBLICATION tap_pub2") + publisher.stop("fast") diff --git a/src/test/subscription/pyt/test_038_walsnd_shutdown_timeout.py b/src/test/subscription/pyt/test_038_walsnd_shutdown_timeout.py new file mode 100644 index 0000000000000..f2bcb23a0cf41 --- /dev/null +++ b/src/test/subscription/pyt/test_038_walsnd_shutdown_timeout.py @@ -0,0 +1,169 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/038_walsnd_shutdown_timeout.pl. + +Checks that the publisher is able to shut down without waiting for sending of +all pending data to the subscriber when wal_sender_shutdown_timeout is set. +""" + +import os +import signal +import time + +import pypg + +_SHUTDOWN_WARNING = ( + r"WARNING: .* terminating walsender process due to " + + r"replication shutdown timeout" +) + + +def _wait_for_full_output_buffer(node): + """Wait until the logical walsender's send position stops advancing.""" + last_sent_lsn = node.safe_psql( + "SELECT sent_lsn FROM pg_stat_replication " + "WHERE application_name = 'test_sub';" + ) + max_attempts = pypg.test_timeout_default() * 10 + while max_attempts >= 0: + max_attempts -= 1 + time.sleep(0.1) + cur_sent_lsn = node.safe_psql( + "SELECT sent_lsn FROM pg_stat_replication " + "WHERE application_name = 'test_sub';" + ) + diff = node.safe_psql( + "SELECT pg_wal_lsn_diff('{}', '{}');".format(cur_sent_lsn, last_sent_lsn) + ) + if diff == "0": + break + last_sent_lsn = cur_sent_lsn + + +def _stop_and_check_timeout(node, msg): + """Fast-stop node and assert the shutdown-timeout warning was logged.""" + log_offset = node.current_log_position() + node.stop("fast") + assert node.log_matches(_SHUTDOWN_WARNING, log_offset), msg + + +def test_038_walsnd_shutdown_timeout(create_pg): + """Publisher shuts down via wal_sender_shutdown_timeout when stalled.""" + node_publisher = create_pg("publisher", allows_streaming="logical", start=False) + node_publisher.append_conf( + "wal_sender_timeout = 1h\nwal_sender_shutdown_timeout = 10ms" + ) + node_publisher.start() + + node_subscriber = create_pg("subscriber") + + node_publisher.safe_psql( + "CREATE TABLE test_tab (id int PRIMARY KEY);\n" + "CREATE PUBLICATION test_pub FOR TABLE test_tab;" + ) + + publisher_connstr = node_publisher.connstr() + " dbname=postgres" + node_subscriber.safe_psql( + "CREATE TABLE test_tab (id int PRIMARY KEY);\n" + "CREATE SUBSCRIPTION test_sub CONNECTION '{}' " + "PUBLICATION test_pub WITH (failover = true);".format(publisher_connstr) + ) + + node_subscriber.wait_for_subscription_sync(node_publisher, "test_sub") + + # Background session on the subscriber that will block the apply worker. + sub_session = node_subscriber.background_psql("postgres") + + # Conflicting transactions block the apply worker on a lock, stalling + # replication; shutting down the publisher must exit the walsender via + # wal_sender_shutdown_timeout. + sub_session.query_safe("BEGIN; INSERT INTO test_tab VALUES (0);") + node_publisher.safe_psql("INSERT INTO test_tab VALUES (0);") + + _stop_and_check_timeout( + node_publisher, "walsender exits due to wal_sender_shutdown_timeout" + ) + + sub_session.query_safe("ABORT;") + node_publisher.start() + node_publisher.wait_for_catchup("test_sub") + + # Same, but with the walsender's output buffer full. + sub_session.query_safe("BEGIN; LOCK TABLE test_tab IN EXCLUSIVE MODE;") + node_publisher.safe_psql("INSERT INTO test_tab VALUES (generate_series(1, 20000));") + _wait_for_full_output_buffer(node_publisher) + + _stop_and_check_timeout( + node_publisher, + "walsender with full output buffer exits due to " + + "wal_sender_shutdown_timeout", + ) + + sub_session.query_safe("ABORT;") + node_publisher.start() + + # Both physical and logical replication active, with slot sync on the + # standby; stall both and confirm shutdown still completes via timeout. + node_publisher.backup( + "publisher_backup", + backup_options=[ + "--create-slot", + "--slot", + "test_slot", + "-d", + "dbname=postgres", + "--write-recovery-conf", + ], + ) + + node_publisher.append_conf("synchronized_standby_slots = 'test_slot'") + node_publisher.reload() + + node_standby = create_pg( + "standby", + from_backup=(node_publisher, "publisher_backup"), + start=False, + ) + # The backup was taken with --write-recovery-conf, so primary_conninfo and + # primary_slot_name are already in postgresql.auto.conf; re-place the + # standby.signal that init_from_backup strips so the standby starts in + # standby mode and connects a walreceiver. + node_standby.set_standby_mode() + node_standby.append_conf("sync_replication_slots = on\nhot_standby_feedback = on") + node_standby.start() + + node_publisher.wait_for_catchup("test_sub") + sub_session.query_safe("BEGIN; LOCK TABLE test_tab IN EXCLUSIVE MODE;") + node_publisher.safe_psql("INSERT INTO test_tab VALUES (-1); ") + + # The remaining scenario stalls physical replication by sending SIGSTOP to + # the standby's walreceiver, which is not portable to Windows; end the test + # here on that platform. + if os.name == "nt": + sub_session.quit() + node_subscriber.stop("fast") + node_standby.stop("fast") + return + + # Block the standby's walreceiver with SIGSTOP, stalling physical + # replication. + assert node_standby.poll_query_until( + "SELECT EXISTS(SELECT 1 FROM pg_stat_wal_receiver)" + ) + receiverpid = node_standby.safe_psql("SELECT pid FROM pg_stat_wal_receiver") + assert receiverpid.isdigit(), "have walreceiver pid {}".format(receiverpid) + os.kill(int(receiverpid), signal.SIGSTOP) + + log_offset = node_publisher.current_log_position() + node_publisher.safe_psql("INSERT INTO test_tab VALUES (-2);") + node_publisher.stop("fast") + assert node_publisher.log_matches(_SHUTDOWN_WARNING, log_offset), ( + "walsender exits due to wal_sender_shutdown_timeout even when both " + "physical and logical replication are stalled" + ) + + os.kill(int(receiverpid), signal.SIGCONT) + sub_session.quit() + + node_subscriber.stop("fast") + node_standby.stop("fast") diff --git a/src/test/subscription/pyt/test_100_bugs.py b/src/test/subscription/pyt/test_100_bugs.py new file mode 100644 index 0000000000000..60842f69cff88 --- /dev/null +++ b/src/test/subscription/pyt/test_100_bugs.py @@ -0,0 +1,425 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/subscription/t/100_bugs.pl. + +Regression tests for assorted logical-replication bugs found over time: index +predicates needing a snapshot (#15114), temp/unlogged tables under FOR ALL +TABLES, initial-sync protocol (#16643) and cascaded sync, REPLICA IDENTITY +index/relcache invalidation, schema-rename invalidation, REPLICA IDENTITY FULL +with dropped/missing columns, create+drop of a replication slot via replication +commands, origin advancement when a trigger swallows an ERROR, and the DROP +SUBSCRIPTION self-deadlock (#18988). +""" + +import pypg + + +def _test_index_predicate_crash(create_pg): + """#15114: index predicates with const-expressions must not crash apply.""" + publisher = create_pg("publisher", allows_streaming="logical") + subscriber = create_pg("subscriber") + connstr = publisher.connstr() + " dbname=postgres" + + for node in (publisher, subscriber): + node.safe_psql("CREATE TABLE tab1 (a int PRIMARY KEY, b int)") + node.safe_psql( + "CREATE FUNCTION double(x int) RETURNS int IMMUTABLE LANGUAGE SQL " + "AS 'select x * 2'" + ) + node.safe_psql("CREATE INDEX ON tab1 (b) WHERE a > double(1)") + + publisher.safe_psql("CREATE PUBLICATION pub1 FOR ALL TABLES") + subscriber.safe_psql( + "CREATE SUBSCRIPTION sub1 CONNECTION '{}' PUBLICATION pub1".format(connstr) + ) + publisher.wait_for_catchup("sub1") + publisher.safe_psql("INSERT INTO tab1 VALUES (1, 2)") + publisher.wait_for_catchup("sub1") + + subscriber.safe_psql("DROP SUBSCRIPTION sub1") + publisher.safe_psql("DROP PUBLICATION pub1") + publisher.safe_psql("DROP TABLE tab1") + publisher.stop("fast") + subscriber.stop("fast") + return publisher, subscriber + + +def _test_temp_unlogged_for_all_tables(publisher, subscriber): + """Temp/unlogged tables are ignored by FOR ALL TABLES (no RI error).""" + publisher.rotate_logfile() + publisher.start() + subscriber.rotate_logfile() + + publisher.safe_psql("CREATE PUBLICATION pub FOR ALL TABLES") + assert ( + publisher.psql_capture( + "CREATE TEMPORARY TABLE tt1 AS SELECT 1 AS a; UPDATE tt1 SET a = 2;" + ).rc + == 0 + ), "update to temporary table without replica identity" + assert ( + publisher.psql_capture( + "CREATE UNLOGGED TABLE tu1 AS SELECT 1 AS a; UPDATE tu1 SET a = 2;" + ).rc + == 0 + ), "update to unlogged table without replica identity" + publisher.safe_psql("DROP PUBLICATION pub") + publisher.stop("fast") + + +def _test_initial_sync_protocol(create_pg): + """#16643: initial sync of an added table completes under two-way load.""" + node = create_pg("twoways", allows_streaming="logical") + for db in ("d1", "d2"): + node.safe_psql("CREATE DATABASE {}".format(db)) + node.safe_psql("CREATE TABLE t (f int)", dbname=db) + node.safe_psql("CREATE TABLE t2 (f int)", dbname=db) + rows = 3000 + node.safe_psql( + "INSERT INTO t SELECT * FROM generate_series(1, {n});\n" + "INSERT INTO t2 SELECT * FROM generate_series(1, {n});\n" + "CREATE PUBLICATION testpub FOR TABLE t;\n" + "SELECT pg_create_logical_replication_slot('testslot', 'pgoutput');".format( + n=rows + ), + dbname="d1", + ) + node.safe_psql( + "CREATE SUBSCRIPTION testsub CONNECTION $${}$$ " + "PUBLICATION testpub WITH (create_slot=false, " + "slot_name='testslot')".format(node.connstr("d1")), + dbname="d2", + ) + node.safe_psql( + "INSERT INTO t SELECT * FROM generate_series(1, {n});\n" + "INSERT INTO t2 SELECT * FROM generate_series(1, {n});".format(n=rows), + dbname="d1", + ) + node.safe_psql("ALTER PUBLICATION testpub ADD TABLE t2", dbname="d1") + node.safe_psql("ALTER SUBSCRIPTION testsub REFRESH PUBLICATION", dbname="d2") + node.wait_for_subscription_sync(node, "testsub", dbname="d2") + assert node.safe_psql("SELECT count(f) FROM t", dbname="d2") == str( + rows * 2 + ), "2x{} rows in t".format(rows) + assert node.safe_psql("SELECT count(f) FROM t2", dbname="d2") == str( + rows * 2 + ), "2x{} rows in t2".format(rows) + node.stop("fast") + + +def _test_cascaded_sync(create_pg): + """Tablesync-written data replicates through a cascaded pub/sub setup.""" + node_pub = create_pg("testpublisher1", allows_streaming="logical") + node_pub_sub = create_pg("testpublisher_subscriber", allows_streaming="logical") + node_sub = create_pg("testsubscriber1") + + for node in (node_pub, node_pub_sub, node_sub): + node.safe_psql("CREATE TABLE tab1 (a int)") + + node_pub.safe_psql("CREATE PUBLICATION testpub1 FOR TABLE tab1") + node_pub_sub.safe_psql("CREATE PUBLICATION testpub2 FOR TABLE tab1") + pub1 = node_pub.connstr() + " dbname=postgres" + pub2 = node_pub_sub.connstr() + " dbname=postgres" + + # testsub2 must be created before testsub1 so that the data written by + # testsub1's tablesync worker also gets replicated to testsub2. + node_sub.safe_psql( + "CREATE SUBSCRIPTION testsub2 CONNECTION '{}' PUBLICATION testpub2".format(pub2) + ) + node_pub_sub.safe_psql( + "CREATE SUBSCRIPTION testsub1 CONNECTION '{}' PUBLICATION testpub1".format(pub1) + ) + node_pub.safe_psql("INSERT INTO tab1 values(generate_series(1,10))") + node_pub.wait_for_catchup("testsub1") + node_pub_sub.wait_for_catchup("testsub2") + + node_pub_sub.safe_psql("DROP SUBSCRIPTION testsub1") + node_sub.safe_psql("DROP SUBSCRIPTION testsub2") + node_pub.safe_psql("DROP PUBLICATION testpub1") + node_pub_sub.safe_psql("DROP PUBLICATION testpub2") + for node in (node_pub, node_pub_sub, node_sub): + node.safe_psql("DROP TABLE tab1") + node.stop("fast") + + +def _test_replica_identity_index(publisher, subscriber): + """Changing the REPLICA IDENTITY index invalidates the target relcache.""" + publisher.rotate_logfile() + publisher.start() + subscriber.rotate_logfile() + subscriber.start() + + for node in (publisher, subscriber): + node.safe_psql( + "CREATE TABLE tab_replidentity_index(a int not null, b int not null)" + ) + node.safe_psql( + "CREATE UNIQUE INDEX idx_replidentity_index_a " + "ON tab_replidentity_index(a)" + ) + node.safe_psql( + "CREATE UNIQUE INDEX idx_replidentity_index_b " + "ON tab_replidentity_index(b)" + ) + publisher.safe_psql( + "ALTER TABLE tab_replidentity_index REPLICA IDENTITY " + "USING INDEX idx_replidentity_index_a" + ) + publisher.safe_psql("INSERT INTO tab_replidentity_index VALUES(1, 1),(2, 2)") + subscriber.safe_psql( + "ALTER TABLE tab_replidentity_index REPLICA IDENTITY " + "USING INDEX idx_replidentity_index_b" + ) + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE tab_replidentity_index") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub CONNECTION '{}' PUBLICATION tap_pub".format( + connstr + ) + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub") + assert ( + subscriber.safe_psql("SELECT * FROM tab_replidentity_index") == "1|1\n2|2" + ), "check initial data on subscriber" + + publisher.safe_psql( + "ALTER TABLE tab_replidentity_index REPLICA IDENTITY " + "USING INDEX idx_replidentity_index_b;\n" + "UPDATE tab_replidentity_index SET a = -a WHERE a = 1;\n" + "DELETE FROM tab_replidentity_index WHERE a = 2;" + ) + publisher.wait_for_catchup("tap_sub") + assert ( + subscriber.safe_psql("SELECT * FROM tab_replidentity_index") == "-1|1" + ), "update works with REPLICA IDENTITY" + + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub") + publisher.safe_psql("DROP PUBLICATION tap_pub") + publisher.safe_psql("DROP TABLE tab_replidentity_index") + subscriber.safe_psql("DROP TABLE tab_replidentity_index") + + +def _test_schema_rename(publisher, subscriber): + """Renaming a schema invalidates replication mapping as expected.""" + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql("CREATE SCHEMA sch1") + publisher.safe_psql("CREATE TABLE sch1.t1 (c1 int)") + subscriber.safe_psql("CREATE SCHEMA sch1") + subscriber.safe_psql("CREATE TABLE sch1.t1 (c1 int)") + subscriber.safe_psql("CREATE SCHEMA sch2") + subscriber.safe_psql("CREATE TABLE sch2.t1 (c1 int)") + + publisher.safe_psql("CREATE PUBLICATION tap_pub_sch FOR ALL TABLES") + subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub_sch CONNECTION '{}' " + "PUBLICATION tap_pub_sch".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub_sch") + + publisher.safe_psql( + "begin;\n" + "insert into sch1.t1 values(1);\n" + "alter schema sch1 rename to sch2;\n" + "create schema sch1;\n" + "create table sch1.t1(c1 int);\n" + "insert into sch1.t1 values(2);\n" + "insert into sch2.t1 values(3);\n" + "commit;" + ) + subscriber.wait_for_subscription_sync(publisher, "tap_sub_sch") + assert ( + subscriber.safe_psql("SELECT * FROM sch1.t1") == "1\n2" + ), "check data in subscriber sch1.t1 after schema rename" + assert ( + subscriber.safe_psql("SELECT * FROM sch2.t1") == "" + ), "no data yet in subscriber sch2.t1 after schema rename" + + subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub_sch REFRESH PUBLICATION") + subscriber.wait_for_subscription_sync(publisher, "tap_sub_sch") + assert ( + subscriber.safe_psql("SELECT * FROM sch2.t1") == "1\n3" + ), "check data in subscriber sch2.t1 after schema rename" + + subscriber.safe_psql("DROP SUBSCRIPTION tap_sub_sch") + publisher.safe_psql("DROP PUBLICATION tap_pub_sch") + publisher.stop("fast") + subscriber.stop("fast") + + +def _test_ri_full_dropped_columns(publisher, subscriber): + """REPLICA IDENTITY FULL with a dropped column still applies updates.""" + publisher.rotate_logfile() + publisher.start() + subscriber.rotate_logfile() + subscriber.start() + + publisher.safe_psql( + "CREATE TABLE dropped_cols (a int, b_drop int, c int);\n" + "ALTER TABLE dropped_cols REPLICA IDENTITY FULL;\n" + "CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;\n" + "INSERT INTO dropped_cols VALUES (1, 1, 1);" + ) + subscriber.safe_psql("CREATE TABLE dropped_cols (a int, b_drop int, c int);") + connstr = publisher.connstr() + " dbname=postgres" + subscriber.safe_psql( + "CREATE SUBSCRIPTION sub_dropped_cols CONNECTION '{}' " + "PUBLICATION pub_dropped_cols".format(connstr) + ) + subscriber.wait_for_subscription_sync() + publisher.safe_psql("ALTER TABLE dropped_cols DROP COLUMN b_drop;") + subscriber.safe_psql("ALTER TABLE dropped_cols DROP COLUMN b_drop;") + publisher.safe_psql("UPDATE dropped_cols SET a = 100;") + publisher.wait_for_catchup("sub_dropped_cols") + assert ( + subscriber.safe_psql("SELECT count(*) FROM dropped_cols WHERE a = 100") == "1" + ), "replication with RI FULL and dropped columns" + publisher.stop("fast") + subscriber.stop("fast") + + +def _test_missing_attribute(publisher, subscriber): + """pgoutput must not replace a missing attribute with NULL (RI FULL).""" + publisher.rotate_logfile() + publisher.start() + subscriber.rotate_logfile() + subscriber.start() + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql( + "CREATE TABLE tab_default (a int);\n" + "ALTER TABLE tab_default REPLICA IDENTITY FULL;\n" + "INSERT INTO tab_default VALUES (1);\n" + "ALTER TABLE tab_default ADD COLUMN b bool DEFAULT false NOT NULL;\n" + "INSERT INTO tab_default VALUES (2, true);\n" + "CREATE PUBLICATION pub1 FOR TABLE tab_default;" + ) + subscriber.safe_psql( + "CREATE TABLE tab_default (a int, b bool);\n" + "CREATE SUBSCRIPTION sub1 CONNECTION '{}' PUBLICATION pub1;".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "sub1") + assert ( + subscriber.safe_psql("SELECT a, b FROM tab_default") == "1|f\n2|t" + ), "check snapshot on subscriber" + + publisher.safe_psql("UPDATE tab_default SET a = a + 1") + publisher.wait_for_catchup("sub1") + assert ( + subscriber.safe_psql("SELECT a, b FROM tab_default") == "2|f\n3|t" + ), "check replicated update on subscriber" + + +def _test_replication_slot_commands(publisher, subscriber): + """Create and immediately drop a logical slot via replication commands.""" + connstr_db = "host={} port={} replication=database dbname=postgres".format( + publisher.host, publisher.port + ) + result = publisher.psql_capture( + "CREATE_REPLICATION_SLOT test_slot LOGICAL pgoutput (SNAPSHOT export);\n" + "DROP_REPLICATION_SLOT test_slot;\n", + on_error_stop=False, + extra_params=["-d", connstr_db], + timeout=pypg.test_timeout_default(), + ) + assert result.rc == 0, "create and immediate drop of replication slot" + publisher.stop("fast") + subscriber.stop("fast") + + +def _test_origin_advance_on_caught_error(publisher, subscriber): + """Origin advances even when a trigger catches the apply-time ERROR.""" + publisher.rotate_logfile() + publisher.start() + subscriber.rotate_logfile() + subscriber.start() + + connstr = publisher.connstr() + " dbname=postgres" + publisher.safe_psql( + "CREATE TABLE t1 (a int);\nCREATE PUBLICATION regress_pub FOR TABLE t1;" + ) + subscriber.safe_psql( + "CREATE TABLE t1 (a int);\n" + "CREATE SUBSCRIPTION regress_sub CONNECTION '{}' " + "PUBLICATION regress_pub;".format(connstr) + ) + subscriber.wait_for_subscription_sync(publisher, "regress_sub") + subscriber.safe_psql(_EXCEPTION_TRIGGER_SQL) + + origin_query = ( + "SELECT remote_lsn FROM pg_replication_origin_status os, " + "pg_subscription s WHERE os.external_id = 'pg_' || s.oid " + "AND s.subname = 'regress_sub'" + ) + remote_lsn = subscriber.safe_psql(origin_query) + publisher.safe_psql("INSERT INTO t1 VALUES (1);") + publisher.wait_for_catchup("regress_sub") + assert ( + subscriber.safe_psql( + "SELECT remote_lsn > '{}' FROM pg_replication_origin_status os, " + "pg_subscription s WHERE os.external_id = 'pg_' || s.oid " + "AND s.subname = 'regress_sub'".format(remote_lsn) + ) + == "t" + ), "remote_lsn has advanced for apply worker raising an exception" + publisher.stop("fast") + subscriber.stop("fast") + + +_EXCEPTION_TRIGGER_SQL = """\ +CREATE FUNCTION handle_exception_trigger() +RETURNS TRIGGER AS $$ +BEGIN + BEGIN + -- Raise an exception + RAISE EXCEPTION 'This is a test exception'; + EXCEPTION + WHEN OTHERS THEN + RETURN NEW; + END; + + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER silent_exception_trigger +AFTER INSERT OR UPDATE ON t1 +FOR EACH ROW +EXECUTE FUNCTION handle_exception_trigger(); + +ALTER TABLE t1 ENABLE ALWAYS TRIGGER silent_exception_trigger; +""" + + +def _test_drop_subscription_deadlock(publisher): + """#18988: DROP SUBSCRIPTION on a fresh db must not self-deadlock.""" + publisher.start() + connstr = publisher.connstr() + " dbname=regress_db" + publisher.safe_psql( + "CREATE DATABASE regress_db;\n" + "CREATE SUBSCRIPTION regress_sub1 CONNECTION '{}' " + "PUBLICATION regress_pub WITH (connect=false);".format(connstr) + ) + result = publisher.psql_capture("DROP SUBSCRIPTION regress_sub1") + assert result.rc != 0, "replication slot does not exist: exit code not 0" + assert ( + 'ERROR: could not drop replication slot "regress_sub1" on publisher' + in result.stderr + ), "could not drop replication slot: error message" + publisher.safe_psql("DROP DATABASE regress_db") + publisher.stop("fast") + + +def test_100_bugs(create_pg): + """Assorted logical-replication bug regressions.""" + publisher, subscriber = _test_index_predicate_crash(create_pg) + _test_temp_unlogged_for_all_tables(publisher, subscriber) + _test_initial_sync_protocol(create_pg) + _test_cascaded_sync(create_pg) + _test_replica_identity_index(publisher, subscriber) + _test_schema_rename(publisher, subscriber) + _test_ri_full_dropped_columns(publisher, subscriber) + _test_missing_attribute(publisher, subscriber) + _test_replication_slot_commands(publisher, subscriber) + _test_origin_advance_on_caught_error(publisher, subscriber) + _test_drop_subscription_deadlock(publisher) From 68f9dd90e7845f869d527d2a275ca00b66d1f055 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:34 -0400 Subject: [PATCH 15/36] pytest: port the src/test/modules TAP suites Port the test-module TAP suites (commit_ts, test_checksums, test_misc, test_json_parser, worker_spi, injection-point and custom-rmgr modules, oauth_validator, libpq_pipeline, and others) to pytest. Co-authored-by: Greg Burd --- src/test/modules/brin/meson.build | 6 + .../modules/brin/pyt/test_01_workitems.py | 54 + .../brin/pyt/test_02_wal_consistency.py | 47 + src/test/modules/commit_ts/meson.build | 8 + .../modules/commit_ts/pyt/test_001_base.py | 31 + .../modules/commit_ts/pyt/test_002_standby.py | 53 + .../commit_ts/pyt/test_003_standby_2.py | 53 + .../modules/commit_ts/pyt/test_004_restart.py | 103 ++ .../modules/ldap_password_func/meson.build | 6 + .../ldap_password_func/pyt/conftest.py | 14 + .../pyt/test_001_mutated_bindpasswd.py | 105 ++ src/test/modules/libpq_pipeline/meson.build | 6 + .../pyt/test_001_libpq_pipeline.py | 65 + src/test/modules/oauth_validator/meson.build | 13 + .../modules/oauth_validator/pyt/conftest.py | 24 + .../oauth_validator/pyt/test_001_server.py | 875 ++++++++++ .../oauth_validator/pyt/test_002_client.py | 307 ++++ .../ssl_passphrase_callback/meson.build | 6 + .../pyt/test_001_testfunc.py | 80 + src/test/modules/test_aio/meson.build | 11 + src/test/modules/test_aio/pyt/test_001_aio.py | 1530 +++++++++++++++++ .../test_aio/pyt/test_002_io_workers.py | 105 ++ .../modules/test_aio/pyt/test_003_initdb.py | 59 + .../test_aio/pyt/test_004_read_stream.py | 242 +++ src/test/modules/test_aio/pyt/testaio.py | 82 + src/test/modules/test_autovacuum/meson.build | 8 + .../pyt/test_001_parallel_autovacuum.py | 102 ++ src/test/modules/test_checksums/meson.build | 16 + .../test_checksums/pyt/datachecksums_utils.py | 76 + .../test_checksums/pyt/test_001_basic.py | 33 + .../test_checksums/pyt/test_002_restarts.py | 63 + .../pyt/test_003_standby_restarts.py | 185 ++ .../test_checksums/pyt/test_004_offline.py | 42 + .../test_checksums/pyt/test_005_injection.py | 49 + .../pyt/test_006_pgbench_single.py | 163 ++ .../pyt/test_007_pgbench_standby.py | 186 ++ .../test_checksums/pyt/test_008_pitr.py | 136 ++ .../test_checksums/pyt/test_009_fpi.py | 41 + src/test/modules/test_cloexec/meson.build | 6 + .../test_cloexec/pyt/test_001_cloexec.py | 22 + .../modules/test_custom_rmgrs/meson.build | 5 + .../test_custom_rmgrs/pyt/test_001_basic.py | 50 + .../modules/test_custom_stats/meson.build | 5 + .../pyt/test_001_custom_stats.py | 86 + src/test/modules/test_escape/meson.build | 6 + .../test_escape/pyt/test_001_test_escape.py | 36 + src/test/modules/test_extensions/meson.build | 5 + .../pyt/test_001_extension_control_path.py | 137 ++ src/test/modules/test_int128/meson.build | 6 + .../test_int128/pyt/test_001_test_int128.py | 20 + src/test/modules/test_json_parser/meson.build | 13 + .../test_001_test_json_parser_incremental.py | 34 + .../test_json_parser/pyt/test_002_inline.py | 150 ++ .../pyt/test_003_test_semantic.py | 28 + .../pyt/test_004_test_parser_perf.py | 24 + src/test/modules/test_misc/meson.build | 20 + .../pyt/test_001_constraint_validation.py | 380 ++++ .../test_misc/pyt/test_002_tablespace.py | 85 + .../test_misc/pyt/test_003_check_guc.py | 64 + .../test_misc/pyt/test_004_io_direct.py | 62 + .../test_misc/pyt/test_005_timeouts.py | 71 + .../pyt/test_006_signal_autovacuum.py | 114 ++ .../test_misc/pyt/test_007_catcache_inval.py | 51 + .../pyt/test_008_replslot_single_user.py | 108 ++ .../test_misc/pyt/test_009_log_temp_files.py | 102 ++ .../pyt/test_010_index_concurrently_upsert.py | 956 ++++++++++ .../test_misc/pyt/test_011_lock_stats.py | 329 ++++ .../test_misc/pyt/test_012_ddlutils.py | 355 ++++ .../pyt/test_013_temp_obj_multisession.py | 307 ++++ src/test/modules/test_pg_dump/meson.build | 5 + .../modules/test_pg_dump/pyt/test_001_base.py | 1359 +++++++++++++++ src/test/modules/test_plan_advice/meson.build | 6 + .../pyt/test_001_replan_regress.py | 50 + src/test/modules/test_saslprep/meson.build | 5 + .../pyt/test_001_saslprep_ranges.py | 28 + src/test/modules/test_shmem/meson.build | 5 + .../pyt/test_001_late_shmem_alloc.py | 37 + src/test/modules/test_slru/meson.build | 6 + .../test_slru/pyt/test_001_multixact.py | 43 + .../pyt/test_002_multixact_wraparound.py | 49 + src/test/modules/worker_spi/meson.build | 9 + .../worker_spi/pyt/test_001_worker_spi.py | 130 ++ .../pyt/test_002_worker_terminate.py | 107 ++ src/test/modules/xid_wraparound/meson.build | 8 + .../pyt/test_001_emergency_vacuum.py | 103 ++ .../xid_wraparound/pyt/test_002_limits.py | 69 + .../pyt/test_003_wraparounds.py | 39 + .../pyt/test_004_notify_freeze.py | 56 + src/test/pytest/pypg/oauthserver.py | 91 + 89 files changed, 10827 insertions(+) create mode 100644 src/test/modules/brin/pyt/test_01_workitems.py create mode 100644 src/test/modules/brin/pyt/test_02_wal_consistency.py create mode 100644 src/test/modules/commit_ts/pyt/test_001_base.py create mode 100644 src/test/modules/commit_ts/pyt/test_002_standby.py create mode 100644 src/test/modules/commit_ts/pyt/test_003_standby_2.py create mode 100644 src/test/modules/commit_ts/pyt/test_004_restart.py create mode 100644 src/test/modules/ldap_password_func/pyt/conftest.py create mode 100644 src/test/modules/ldap_password_func/pyt/test_001_mutated_bindpasswd.py create mode 100644 src/test/modules/libpq_pipeline/pyt/test_001_libpq_pipeline.py create mode 100644 src/test/modules/oauth_validator/pyt/conftest.py create mode 100644 src/test/modules/oauth_validator/pyt/test_001_server.py create mode 100644 src/test/modules/oauth_validator/pyt/test_002_client.py create mode 100644 src/test/modules/ssl_passphrase_callback/pyt/test_001_testfunc.py create mode 100644 src/test/modules/test_aio/pyt/test_001_aio.py create mode 100644 src/test/modules/test_aio/pyt/test_002_io_workers.py create mode 100644 src/test/modules/test_aio/pyt/test_003_initdb.py create mode 100644 src/test/modules/test_aio/pyt/test_004_read_stream.py create mode 100644 src/test/modules/test_aio/pyt/testaio.py create mode 100644 src/test/modules/test_autovacuum/pyt/test_001_parallel_autovacuum.py create mode 100644 src/test/modules/test_checksums/pyt/datachecksums_utils.py create mode 100644 src/test/modules/test_checksums/pyt/test_001_basic.py create mode 100644 src/test/modules/test_checksums/pyt/test_002_restarts.py create mode 100644 src/test/modules/test_checksums/pyt/test_003_standby_restarts.py create mode 100644 src/test/modules/test_checksums/pyt/test_004_offline.py create mode 100644 src/test/modules/test_checksums/pyt/test_005_injection.py create mode 100644 src/test/modules/test_checksums/pyt/test_006_pgbench_single.py create mode 100644 src/test/modules/test_checksums/pyt/test_007_pgbench_standby.py create mode 100644 src/test/modules/test_checksums/pyt/test_008_pitr.py create mode 100644 src/test/modules/test_checksums/pyt/test_009_fpi.py create mode 100644 src/test/modules/test_cloexec/pyt/test_001_cloexec.py create mode 100644 src/test/modules/test_custom_rmgrs/pyt/test_001_basic.py create mode 100644 src/test/modules/test_custom_stats/pyt/test_001_custom_stats.py create mode 100644 src/test/modules/test_escape/pyt/test_001_test_escape.py create mode 100644 src/test/modules/test_extensions/pyt/test_001_extension_control_path.py create mode 100644 src/test/modules/test_int128/pyt/test_001_test_int128.py create mode 100644 src/test/modules/test_json_parser/pyt/test_001_test_json_parser_incremental.py create mode 100644 src/test/modules/test_json_parser/pyt/test_002_inline.py create mode 100644 src/test/modules/test_json_parser/pyt/test_003_test_semantic.py create mode 100644 src/test/modules/test_json_parser/pyt/test_004_test_parser_perf.py create mode 100644 src/test/modules/test_misc/pyt/test_001_constraint_validation.py create mode 100644 src/test/modules/test_misc/pyt/test_002_tablespace.py create mode 100644 src/test/modules/test_misc/pyt/test_003_check_guc.py create mode 100644 src/test/modules/test_misc/pyt/test_004_io_direct.py create mode 100644 src/test/modules/test_misc/pyt/test_005_timeouts.py create mode 100644 src/test/modules/test_misc/pyt/test_006_signal_autovacuum.py create mode 100644 src/test/modules/test_misc/pyt/test_007_catcache_inval.py create mode 100644 src/test/modules/test_misc/pyt/test_008_replslot_single_user.py create mode 100644 src/test/modules/test_misc/pyt/test_009_log_temp_files.py create mode 100644 src/test/modules/test_misc/pyt/test_010_index_concurrently_upsert.py create mode 100644 src/test/modules/test_misc/pyt/test_011_lock_stats.py create mode 100644 src/test/modules/test_misc/pyt/test_012_ddlutils.py create mode 100644 src/test/modules/test_misc/pyt/test_013_temp_obj_multisession.py create mode 100644 src/test/modules/test_pg_dump/pyt/test_001_base.py create mode 100644 src/test/modules/test_plan_advice/pyt/test_001_replan_regress.py create mode 100644 src/test/modules/test_saslprep/pyt/test_001_saslprep_ranges.py create mode 100644 src/test/modules/test_shmem/pyt/test_001_late_shmem_alloc.py create mode 100644 src/test/modules/test_slru/pyt/test_001_multixact.py create mode 100644 src/test/modules/test_slru/pyt/test_002_multixact_wraparound.py create mode 100644 src/test/modules/worker_spi/pyt/test_001_worker_spi.py create mode 100644 src/test/modules/worker_spi/pyt/test_002_worker_terminate.py create mode 100644 src/test/modules/xid_wraparound/pyt/test_001_emergency_vacuum.py create mode 100644 src/test/modules/xid_wraparound/pyt/test_002_limits.py create mode 100644 src/test/modules/xid_wraparound/pyt/test_003_wraparounds.py create mode 100644 src/test/modules/xid_wraparound/pyt/test_004_notify_freeze.py create mode 100644 src/test/pytest/pypg/oauthserver.py diff --git a/src/test/modules/brin/meson.build b/src/test/modules/brin/meson.build index 39a8b2fc925c0..f9f387de9e562 100644 --- a/src/test/modules/brin/meson.build +++ b/src/test/modules/brin/meson.build @@ -9,6 +9,12 @@ tests += { 'summarization-and-inprogress-insertion', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_01_workitems.py', + 'pyt/test_02_wal_consistency.py', + ], + }, 'tap': { 'tests': [ 't/01_workitems.pl', diff --git a/src/test/modules/brin/pyt/test_01_workitems.py b/src/test/modules/brin/pyt/test_01_workitems.py new file mode 100644 index 0000000000000..5a619fad9adfc --- /dev/null +++ b/src/test/modules/brin/pyt/test_01_workitems.py @@ -0,0 +1,54 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/brin/t/01_workitems.pl. + +BRIN autosummarization work-items: autovacuum processes queued BRIN summarization requests so that index ranges get summarized. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import time + + +def test_01_workitems(create_pg): + """BRIN autosummarization work-items.""" + node = create_pg("tango", start=False) + node.append_conf("autovacuum_naptime=1s") + node.start() + node.safe_psql("create extension pageinspect") + node.safe_psql( + "create table brin_wi (a int) with (fillfactor = 10);\n\t create index brin_wi_idx on brin_wi using brin (a) with (pages_per_range=1, autosummarize=on);" + ) + node.safe_psql( + "create table journal (d timestamp) with (fillfactor = 10);\n\t create function packdate(d timestamp) returns text language plpgsql\n\t as $$ begin return to_char(d, 'yyyymm'); end; $$\n\t returns null on null input immutable;\n\t create index brin_packdate_idx on journal using brin (packdate(d))\n\t with (autosummarize = on, pages_per_range = 1);" + ) + count = node.safe_psql( + "select count(*) from brin_page_items(get_raw_page('brin_wi_idx', 2), 'brin_wi_idx'::regclass)" + ) + assert count == "1", "initial brin_wi_idx index state is correct" + count = node.safe_psql( + "select count(*) from brin_page_items(get_raw_page('brin_packdate_idx', 2), 'brin_packdate_idx'::regclass)" + ) + assert count == "1", "initial brin_packdate_idx index state is correct" + node.safe_psql("insert into brin_wi select * from generate_series(1, 100)") + node.safe_psql( + "insert into journal select * from generate_series(timestamp '1976-08-01', '1976-10-28', '1 day')" + ) + time.sleep(1) + node.poll_query_until( + "select count(*) > 1 from brin_page_items(get_raw_page('brin_wi_idx', 2), 'brin_wi_idx'::regclass)", + expected="t", + ) + count = node.safe_psql( + "select count(*) from brin_page_items(get_raw_page('brin_wi_idx', 2), 'brin_wi_idx'::regclass)\n\t where not placeholder;" + ) + assert int(count) > 1, "$count brin_wi_idx ranges got summarized" + node.poll_query_until( + "select count(*) > 1 from brin_page_items(get_raw_page('brin_packdate_idx', 2), 'brin_packdate_idx'::regclass)", + expected="t", + ) + count = node.safe_psql( + "select count(*) from brin_page_items(get_raw_page('brin_packdate_idx', 2), 'brin_packdate_idx'::regclass)\n\t where not placeholder;" + ) + assert int(count) > 1, "$count brin_packdate_idx ranges got summarized" + node.stop() diff --git a/src/test/modules/brin/pyt/test_02_wal_consistency.py b/src/test/modules/brin/pyt/test_02_wal_consistency.py new file mode 100644 index 0000000000000..beed6f7868f9a --- /dev/null +++ b/src/test/modules/brin/pyt/test_02_wal_consistency.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/brin/t/02_wal_consistency.pl. + +BRIN WAL consistency: a BRIN index built and updated on a primary produces revmap WAL records that replay correctly on a streaming standby (wal_consistency_checking). +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_02_wal_consistency(create_pg): + """BRIN revmap WAL records replay correctly on a standby.""" + whiskey = create_pg("whiskey", allows_streaming=True, start=False) + whiskey.append_conf("wal_consistency_checking = brin") + whiskey.start() + whiskey.safe_psql("create extension pageinspect") + whiskey.safe_psql("create extension pg_walinspect") + assert ( + whiskey.psql_capture( + "SELECT pg_create_physical_replication_slot('standby_1');" + ).rc + == 0 + ), "physical slot created on primary" + backup_name = "brinbkp" + whiskey.backup(backup_name) + charlie = create_pg( + "charlie", from_backup=(whiskey, backup_name), has_streaming=True, start=False + ) + charlie.append_conf("primary_slot_name = standby_1") + charlie.start() + whiskey.safe_psql( + "create table tbl_timestamp0 (d1 timestamp(0) without time zone) with (fillfactor=10);\ncreate index on tbl_timestamp0 using brin (d1) with (pages_per_range = 1, autosummarize=false);" + ) + start_lsn = whiskey.lsn("insert") + whiskey.safe_psql( + "do\n$$\ndeclare\n current timestamp with time zone := '2019-03-27 08:14:01.123456789 UTC';\nbegin\n loop\n insert into tbl_timestamp0 select i from\n generate_series(current, current + interval '1 day', '28 seconds') i;\n perform brin_summarize_new_values('tbl_timestamp0_d1_idx');\n if (brin_metapage_info(get_raw_page('tbl_timestamp0_d1_idx', 0))).lastrevmappage > 1 then\n exit;\n end if;\n current := current + interval '1 day';\n end loop;\nend\n$$;" + ) + end_lsn = whiskey.lsn("flush") + result = whiskey.psql_capture( + "select count(*) from pg_get_wal_records_info('" + + str(start_lsn) + + "', '" + + str(end_lsn) + + "')\n\twhere resource_manager = 'BRIN' AND\n\trecord_type ILIKE '%revmap%'" + ) + assert int(result.stdout) >= 1 + whiskey.wait_for_catchup(charlie) diff --git a/src/test/modules/commit_ts/meson.build b/src/test/modules/commit_ts/meson.build index d8ee6ec426d1d..221a6cbf712f7 100644 --- a/src/test/modules/commit_ts/meson.build +++ b/src/test/modules/commit_ts/meson.build @@ -12,6 +12,14 @@ tests += { # which typical runningcheck users do not have (e.g. buildfarm clients). 'runningcheck': false, }, + 'pytest': { + 'tests': [ + 'pyt/test_004_restart.py', + 'pyt/test_001_base.py', + 'pyt/test_002_standby.py', + 'pyt/test_003_standby_2.py', + ], + }, 'tap': { 'tests': [ 't/001_base.pl', diff --git a/src/test/modules/commit_ts/pyt/test_001_base.py b/src/test/modules/commit_ts/pyt/test_001_base.py new file mode 100644 index 0000000000000..2983042d584b6 --- /dev/null +++ b/src/test/modules/commit_ts/pyt/test_001_base.py @@ -0,0 +1,31 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/commit_ts/t/001_base.pl. + +Single-node commit-timestamp test: with track_commit_timestamp=on, a row's +commit timestamp (pg_xact_commit_timestamp of its xmin) is set close to now() +and survives an immediate-shutdown crash recovery unchanged. Generated from the +Perl original via .agent/gen_golden.py. +""" + + +def test_001_base(create_pg): + """Commit timestamp is set and persists across crash recovery.""" + node = create_pg("foxtrot", start=False) + node.append_conf("track_commit_timestamp=on") + node.start() + node.safe_psql("create table t as select now from (select now(), pg_sleep(1)) f") + true = node.safe_psql( + "select t.now - ts.* < '1s' from t, pg_class c, pg_xact_commit_timestamp(c.xmin) ts where relname = 't'" + ) + assert true == "t", "commit TS is set" + ts = node.safe_psql( + "select ts.* from pg_class, pg_xact_commit_timestamp(xmin) ts where relname = 't'" + ) + node.stop("immediate") + node.start() + recovered_ts = node.safe_psql( + "select ts.* from pg_class, pg_xact_commit_timestamp(xmin) ts where relname = 't'" + ) + assert recovered_ts == ts, "commit TS remains after crash recovery" diff --git a/src/test/modules/commit_ts/pyt/test_002_standby.py b/src/test/modules/commit_ts/pyt/test_002_standby.py new file mode 100644 index 0000000000000..49263172b800e --- /dev/null +++ b/src/test/modules/commit_ts/pyt/test_002_standby.py @@ -0,0 +1,53 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/commit_ts/t/002_standby.pl. + +Commit timestamps replicate to a streaming standby: a transaction's commit timestamp queried on the standby matches the value on the primary. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import re + + +def test_002_standby(create_pg): + """Commit timestamps replicate to a streaming standby.""" + bkplabel = "backup" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf("\n\ttrack_commit_timestamp = on\n\tmax_wal_senders = 5\n\t") + primary.start() + primary.backup(bkplabel) + standby = create_pg( + "standby", from_backup=(primary, bkplabel), has_streaming=True, start=False + ) + standby.start() + for i in range(1, 11): + primary.safe_psql("create table t" + str(i) + "()") + primary_ts = primary.safe_psql( + "SELECT ts.* FROM pg_class, pg_xact_commit_timestamp(xmin) AS ts WHERE relname = 't10'" + ) + primary_lsn = primary.safe_psql("select pg_current_wal_lsn()") + assert standby.poll_query_until( + "SELECT '" + str(primary_lsn) + "'::pg_lsn <= pg_last_wal_replay_lsn()" + ), "standby never caught up" + standby_ts = standby.safe_psql( + "select ts.* from pg_class, pg_xact_commit_timestamp(xmin) ts where relname = 't10'" + ) + assert primary_ts == standby_ts, "standby gives same value as primary" + primary.append_conf("track_commit_timestamp = off") + primary.restart() + primary.safe_psql("checkpoint") + primary_lsn = primary.safe_psql("select pg_current_wal_lsn()") + assert standby.poll_query_until( + "SELECT '" + str(primary_lsn) + "'::pg_lsn <= pg_last_wal_replay_lsn()" + ), "standby never caught up" + standby.safe_psql("checkpoint") + result = standby.psql_capture( + "select ts.* from pg_class, pg_xact_commit_timestamp(xmin) ts where relname = 't10'" + ) + assert result.rc == 3, "standby errors when primary turned feature off" + assert result.stdout == "", "standby gives no value when primary turned feature off" + assert re.search( + r"""could not get commit timestamp data""", + result.stderr, + ), "expected error when primary turned feature off" diff --git a/src/test/modules/commit_ts/pyt/test_003_standby_2.py b/src/test/modules/commit_ts/pyt/test_003_standby_2.py new file mode 100644 index 0000000000000..edb0137d03303 --- /dev/null +++ b/src/test/modules/commit_ts/pyt/test_003_standby_2.py @@ -0,0 +1,53 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/commit_ts/t/003_standby_2.pl. + +Commit timestamps and the track_commit_timestamp setting interact correctly across a standby promotion: after promotion the standby returns valid commit timestamps. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import re + + +def test_003_standby_2(create_pg): + """Commit timestamps remain valid after standby promotion.""" + bkplabel = "backup" + primary = create_pg("primary", allows_streaming=True, start=False) + primary.append_conf("\n\ttrack_commit_timestamp = on\n\tmax_wal_senders = 5\n\t") + primary.start() + primary.backup(bkplabel) + standby = create_pg( + "standby", from_backup=(primary, bkplabel), has_streaming=True, start=False + ) + standby.start() + for i in range(1, 11): + primary.safe_psql("create table t" + str(i) + "()") + primary.append_conf("track_commit_timestamp = off") + primary.restart() + primary.safe_psql("checkpoint") + primary_lsn = primary.safe_psql("select pg_current_wal_lsn()") + assert standby.poll_query_until( + "SELECT '" + str(primary_lsn) + "'::pg_lsn <= pg_last_wal_replay_lsn()" + ), "standby never caught up" + standby.safe_psql("checkpoint") + standby.restart() + result = standby.psql_capture( + "SELECT ts.* FROM pg_class, pg_xact_commit_timestamp(xmin) AS ts WHERE relname = 't10'" + ) + assert result.rc == 3, "expect error when getting commit timestamp after restart" + assert result.stdout == "", "standby does not return a value after restart" + assert re.search( + r"""could not get commit timestamp data""", + result.stderr, + ), "expected err msg after restart" + primary.append_conf("track_commit_timestamp = on") + primary.restart() + primary.append_conf("track_commit_timestamp = off") + primary.restart() + standby.promote() + standby.safe_psql("create table t11()") + standby_ts = standby.safe_psql( + "SELECT ts.* FROM pg_class, pg_xact_commit_timestamp(xmin) AS ts WHERE relname = 't11'" + ) + assert standby_ts != "", "standby gives valid value ($standby_ts) after promotion" diff --git a/src/test/modules/commit_ts/pyt/test_004_restart.py b/src/test/modules/commit_ts/pyt/test_004_restart.py new file mode 100644 index 0000000000000..680aca49b97f4 --- /dev/null +++ b/src/test/modules/commit_ts/pyt/test_004_restart.py @@ -0,0 +1,103 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/commit_ts/t/004_restart.pl. + +Commit-timestamp behaviour across server restarts and track_commit_timestamp +GUC toggling, including that timestamps recorded while enabled remain readable +and that querying commit timestamps errors out when the feature is disabled. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import re + + +def test_004_restart(create_pg): + """Generated golden port of 004_restart.""" + node_primary = create_pg("primary", allows_streaming=True, start=False) + node_primary.append_conf("track_commit_timestamp = on") + node_primary.start() + result = node_primary.psql_capture("SELECT pg_xact_commit_timestamp('0');") + assert result.rc == 3, "getting ts of InvalidTransactionId reports error" + assert re.search( + r"""cannot retrieve commit timestamp for transaction""", + result.stderr, + ), "expected error from InvalidTransactionId" + result = node_primary.psql_capture("SELECT pg_xact_commit_timestamp('1');") + assert result.rc == 0, "getting ts of BootstrapTransactionId succeeds" + assert result.stdout == "", "timestamp of BootstrapTransactionId is null" + result = node_primary.psql_capture("SELECT pg_xact_commit_timestamp('2');") + assert result.rc == 0, "getting ts of FrozenTransactionId succeeds" + assert result.stdout == "", "timestamp of FrozenTransactionId is null" + assert ( + node_primary.safe_psql("SELECT pg_xact_commit_timestamp('3');") == "" + ), "committs for FirstNormalTransactionId is null" + node_primary.safe_psql( + "CREATE TABLE committs_test(x integer, y timestamp with time zone);" + ) + xid = node_primary.safe_psql( + "BEGIN;\n\tINSERT INTO committs_test(x, y) VALUES (1, current_timestamp);\n\tSELECT pg_current_xact_id()::xid;\n\tCOMMIT;" + ) + before_restart_ts = node_primary.safe_psql( + "SELECT pg_xact_commit_timestamp('" + str(xid) + "');" + ) + assert ( + before_restart_ts != "" and before_restart_ts != "null" + ), "commit timestamp recorded" + node_primary.stop("immediate") + node_primary.start() + after_crash_ts = node_primary.safe_psql( + "SELECT pg_xact_commit_timestamp('" + str(xid) + "');" + ) + assert ( + after_crash_ts == before_restart_ts + ), "timestamps before and after crash are equal" + node_primary.stop("fast") + node_primary.start() + after_restart_ts = node_primary.safe_psql( + "SELECT pg_xact_commit_timestamp('" + str(xid) + "');" + ) + assert ( + after_restart_ts == before_restart_ts + ), "timestamps before and after restart are equal" + node_primary.append_conf("track_commit_timestamp = off") + node_primary.stop("fast") + node_primary.start() + node_primary.restart() + node_primary.safe_psql( + "CREATE PROCEDURE consume_xid(cnt int)\nAS $$\nDECLARE\n i int;\n BEGIN\n FOR i in 1..cnt LOOP\n EXECUTE 'SELECT pg_current_xact_id()';\n COMMIT;\n END LOOP;\n END;\n$$\nLANGUAGE plpgsql;" + ) + node_primary.safe_psql("CALL consume_xid(2000)") + result = node_primary.psql_capture( + "SELECT pg_xact_commit_timestamp('" + str(xid) + "');" + ) + assert result.rc == 3, "no commit timestamp from enable tx when cts disabled" + assert re.search( + r"""could not get commit timestamp data""", + result.stderr, + ), "expected error from enabled tx when committs disabled" + xid_disabled = node_primary.safe_psql( + "BEGIN;\n\tINSERT INTO committs_test(x, y) VALUES (2, current_timestamp);\n\tSELECT pg_current_xact_id();\n\tCOMMIT;" + ) + result = node_primary.psql_capture( + "SELECT pg_xact_commit_timestamp('" + str(xid_disabled) + "');" + ) + assert result.rc == 3, "no commit timestamp when disabled" + assert re.search( + r"""could not get commit timestamp data""", + result.stderr, + ), "expected error from disabled tx when committs disabled" + node_primary.append_conf("track_commit_timestamp = on") + node_primary.stop("immediate") + node_primary.start() + after_enable_ts = node_primary.safe_psql( + "SELECT pg_xact_commit_timestamp('" + str(xid) + "');" + ) + assert after_enable_ts == "", "timestamp of enabled tx null after re-enable" + after_enable_disabled_ts = node_primary.safe_psql( + "SELECT pg_xact_commit_timestamp('" + str(xid_disabled) + "');" + ) + assert ( + after_enable_disabled_ts == "" + ), "timestamp of disabled tx null after re-enable" + node_primary.stop() diff --git a/src/test/modules/ldap_password_func/meson.build b/src/test/modules/ldap_password_func/meson.build index 209b66833734f..8a29ef2c0f7c2 100644 --- a/src/test/modules/ldap_password_func/meson.build +++ b/src/test/modules/ldap_password_func/meson.build @@ -30,4 +30,10 @@ tests += { ], 'env': {'with_ldap': 'yes'} }, + 'pytest': { + 'tests': [ + 'pyt/test_001_mutated_bindpasswd.py', + ], + 'env': {'with_ldap': 'yes'} + }, } diff --git a/src/test/modules/ldap_password_func/pyt/conftest.py b/src/test/modules/ldap_password_func/pyt/conftest.py new file mode 100644 index 0000000000000..cd452d44c5134 --- /dev/null +++ b/src/test/modules/ldap_password_func/pyt/conftest.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +"""Shared fixtures for the ldap_password_func pytest suite. + +Reuses the shared LDAP server infrastructure from ``pypg.ldapserver`` (the +Python twin of ``use lib ".../ldap"; use LdapServer;`` in the Perl original) by +re-exporting its ``ldap_server`` fixture. Because the helper now lives in the +shared pypg package, no sys.path manipulation is needed. +""" + +# pylint: disable=unused-import +from pypg.ldapserver import ( # noqa: F401 + ldap_server_fixture as ldap_server, +) diff --git a/src/test/modules/ldap_password_func/pyt/test_001_mutated_bindpasswd.py b/src/test/modules/ldap_password_func/pyt/test_001_mutated_bindpasswd.py new file mode 100644 index 0000000000000..ed4509fc846af --- /dev/null +++ b/src/test/modules/ldap_password_func/pyt/test_001_mutated_bindpasswd.py @@ -0,0 +1,105 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/ldap_password_func/t/001_mutated_bindpasswd.pl. + +Verifies the ldap_password_func contrib module, which mutates the configured +ldapbindpasswd by rot13. With the module preloaded: a wrong bindpasswd fails, +the clear-text password fails (because the hook rot13's it before the bind), +and the rot13'd password succeeds (the hook rot13's it back to clear text). +Reuses the LDAP server infrastructure from src/test/ldap/pyt. +""" + +import os + + +def _test_access(node, role, expected_res, test_name): + """Connect as role; assert success (0) or failure, mirroring test_access.""" + connstr = "user={}".format(role) + if expected_res == 0: + node.connect_ok(connstr, test_name) + else: + # No checks of the error message, only the status code. + node.connect_fails(connstr, test_name) + + +def _write_hba(node, line): + """Replace pg_hba.conf with a single line (mirrors unlink + append_conf).""" + hba = node.datadir / "pg_hba.conf" + hba.unlink() + node.append_conf(line, filename="pg_hba.conf") + + +def test_001_mutated_bindpasswd(create_pg, ldap_server): + """ldap_password_func rot13's ldapbindpasswd: only the rot13'd value works.""" + clear_ldap_rootpw = "FooBaR1" + rot13_ldap_rootpw = "SbbOnE1" + + ldap = ldap_server(clear_ldap_rootpw, "users") # no anonymous auth + authdata = os.path.normpath( + os.path.join( + os.path.dirname(__file__), "..", "..", "..", "ldap", "authdata.ldif" + ) + ) + ldap.ldapadd_file(authdata) + ldap.ldapsetpw("uid=test1,dc=example,dc=net", "secret1") + + ldap_server_host, ldap_port, ldap_basedn, ldap_rootdn = ldap.prop( + "server", "port", "basedn", "rootdn" + ) + + node = create_pg("node", start=False) + node.append_conf("log_connections = 'receipt,authentication,authorization'\n") + node.append_conf("shared_preload_libraries = 'ldap_password_func'") + node.start() + + node.safe_psql("CREATE USER test1;") + + old_pgpassword = os.environ.get("PGPASSWORD") + os.environ["PGPASSWORD"] = "secret1" + try: + _write_hba( + node, + "local all all ldap ldapserver={} ldapport={} " + 'ldapbasedn="{}" ldapbinddn="{}" ldapbindpasswd=wrong'.format( + ldap_server_host, ldap_port, ldap_basedn, ldap_rootdn + ), + ) + node.restart() + _test_access( + node, + "test1", + 2, + "search+bind authentication fails with wrong ldapbindpasswd", + ) + + _write_hba( + node, + "local all all ldap ldapserver={} ldapport={} " + 'ldapbasedn="{}" ldapbinddn="{}" ldapbindpasswd="{}"'.format( + ldap_server_host, ldap_port, ldap_basedn, ldap_rootdn, clear_ldap_rootpw + ), + ) + node.restart() + _test_access( + node, "test1", 2, "search+bind authentication fails with clear password" + ) + + _write_hba( + node, + "local all all ldap ldapserver={} ldapport={} " + 'ldapbasedn="{}" ldapbinddn="{}" ldapbindpasswd="{}"'.format( + ldap_server_host, ldap_port, ldap_basedn, ldap_rootdn, rot13_ldap_rootpw + ), + ) + node.restart() + _test_access( + node, + "test1", + 0, + "search+bind authentication succeeds with rot13ed password", + ) + finally: + if old_pgpassword is None: + os.environ.pop("PGPASSWORD", None) + else: + os.environ["PGPASSWORD"] = old_pgpassword diff --git a/src/test/modules/libpq_pipeline/meson.build b/src/test/modules/libpq_pipeline/meson.build index 5bb895d8548fa..a23a011745347 100644 --- a/src/test/modules/libpq_pipeline/meson.build +++ b/src/test/modules/libpq_pipeline/meson.build @@ -29,4 +29,10 @@ tests += { ], 'deps': [libpq_pipeline], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_libpq_pipeline.py', + ], + 'deps': [libpq_pipeline], + }, } diff --git a/src/test/modules/libpq_pipeline/pyt/test_001_libpq_pipeline.py b/src/test/modules/libpq_pipeline/pyt/test_001_libpq_pipeline.py new file mode 100644 index 0000000000000..f528b801790da --- /dev/null +++ b/src/test/modules/libpq_pipeline/pyt/test_001_libpq_pipeline.py @@ -0,0 +1,65 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl. + +Runs every sub-test reported by the libpq_pipeline C program against a live +server using the latest protocol version, and for the trace-producing tests +compares the emitted libpq trace against the expected trace checked into the +source tree. Also exercises query cancellation over protocol 3.0. +""" + +import os + +import pypg + +_NUMROWS = 700 +_CMPTRACE = { + "simple_pipeline", + "nosync", + "multi_pipelines", + "prepared", + "singlerow", + "pipeline_abort", + "pipeline_idle", + "transaction", + "disallowed_in_pipeline", +} +_TRACES_DIR = os.path.join(os.path.dirname(__file__), "..", "traces") + + +def test_001_libpq_pipeline(create_pg, pg_bin, tmp_check): + """Each libpq_pipeline sub-test passes; trace tests match expected traces.""" + node = create_pg("main") + result = pg_bin.run_command(["libpq_pipeline", "tests"]) + assert result.stderr == "", "oops: {}".format(result.stderr) + tests = result.stdout.split() + out_traces = tmp_check / "traces" + out_traces.mkdir(exist_ok=True) + connstr = node.connstr("postgres") + for testname in tests: + extraargs = ["-r", str(_NUMROWS)] + cmptrace = testname in _CMPTRACE + traceout = out_traces / "{}.trace".format(testname) + if cmptrace: + extraargs += ["-t", str(traceout)] + node.command_ok( + ["libpq_pipeline"] + + extraargs + + [testname, connstr + " max_protocol_version=latest"], + "libpq_pipeline {}".format(testname), + ) + if cmptrace: + expected = pypg.slurp_file( + os.path.join(_TRACES_DIR, "{}.trace".format(testname)) + ) + if expected == "": + continue + actual = pypg.slurp_file(str(traceout)) + if actual == "": + continue + assert actual == expected, "{} trace match".format(testname) + node.command_ok( + ["libpq_pipeline", "cancel", connstr + " max_protocol_version=3.0"], + "libpq_pipeline cancel with protocol 3.0", + ) + node.stop("fast") diff --git a/src/test/modules/oauth_validator/meson.build b/src/test/modules/oauth_validator/meson.build index 506a9894b8d3c..24fbf9818633a 100644 --- a/src/test/modules/oauth_validator/meson.build +++ b/src/test/modules/oauth_validator/meson.build @@ -84,4 +84,17 @@ tests += { }, 'deps': [oauth_hook_client], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_server.py', + 'pyt/test_002_client.py', + ], + 'env': { + 'PYTHON': python.full_path(), + 'with_libcurl': oauth_flow_supported ? 'yes' : 'no', + 'with_python': 'yes', + 'cert_dir': meson.project_source_root() / 'src/test/ssl/ssl', + }, + 'deps': [oauth_hook_client], + }, } diff --git a/src/test/modules/oauth_validator/pyt/conftest.py b/src/test/modules/oauth_validator/pyt/conftest.py new file mode 100644 index 0000000000000..8b91395ae6cdb --- /dev/null +++ b/src/test/modules/oauth_validator/pyt/conftest.py @@ -0,0 +1,24 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Shared fixtures for the oauth_validator pytest suite. + +Exposes the mock OAuth authorization server (see pypg.oauthserver) as a +module-scoped ``webserver`` fixture, mirroring the OAuth::Server usage in the +Perl tests. The harness now lives in the shared pypg package, so it is a plain +import. +""" + +import pytest + +from pypg.oauthserver import OAuthServer + + +@pytest.fixture(scope="module") +def webserver(): + """Run the mock OAuth authorization server for the duration of a module.""" + server = OAuthServer() + server.run() + try: + yield server + finally: + server.stop() diff --git a/src/test/modules/oauth_validator/pyt/test_001_server.py b/src/test/modules/oauth_validator/pyt/test_001_server.py new file mode 100644 index 0000000000000..dfd87c4786898 --- /dev/null +++ b/src/test/modules/oauth_validator/pyt/test_001_server.py @@ -0,0 +1,875 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/oauth_validator/t/001_server.pl. + +Tests the libpq builtin OAuth flow, along with server-side HBA and validator +setup, against a mock OAuth issuer (pyt/oauth_server.py, launched via the +OAuthServer harness). Gated behind PG_TEST_EXTRA=oauth; additionally requires a +platform with epoll/kqueue, a libcurl-enabled build (with_libcurl=yes) and +Python support (with_python=yes), exactly as the Perl original. + +The test runs as a single ordered function because, like the Perl script, it +mutates shared server state (pg_hba.conf, pg_ident.conf, ALTER SYSTEM settings, +oauth_validator_libraries) sequentially and each step depends on the previous +one. +""" + +import base64 +import json +import os +import re + +import pytest + +import pypg + +pytestmark = pypg.require_test_extras("oauth") + +# Every allowed character for a client_id/secret (the OAuth "VSCHAR" class). +# Unlike Perl we do not need a separately backslash-escaped variant: connection +# options are passed as discrete strings, not embedded in a quoted connstr. +_VSCHARS = ( + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`" + "abcdefghijklmnopqrstuvwxyz{|}~" +) + +_VISIT = r"Visit https://example\.com/ and enter the code: postgresuser" +_VISIT_ORG = r"Visit https://example\.org/ and enter the code: postgresuser" + + +@pytest.fixture(scope="module", autouse=True) +def _require_platform(): + """Skip on platforms or builds without OAuth server-side support.""" + if not ( + pypg.check_pg_config(r"#define HAVE_SYS_EVENT_H 1") + or pypg.check_pg_config(r"#define HAVE_SYS_EPOLL_H 1") + ): + pytest.skip("OAuth server-side tests are not supported on this platform") + if os.environ.get("with_libcurl") != "yes": + pytest.skip("client-side OAuth not supported by this build") + if os.environ.get("with_python") != "yes": + pytest.skip("OAuth tests require --with-python to run") + + +def _conninfo_quote(value): + """Quote a value for inclusion in a libpq connection string. + + Mirrors libpq's keyword=value parsing: wrap in single quotes and escape + embedded backslashes and single quotes, so the full VSCHAR set survives. + """ + escaped = value.replace("\\", "\\\\").replace("'", "\\'") + return "'{}'".format(escaped) + + +def _encode_connstr(base_connstr, **params): + """Return base_connstr with a Base64(JSON) params blob as oauth_client_id. + + Mirrors the connstr() helper in 001_server.pl: the mock server's /param + issuer decodes the test instructions out of the client_id field. + """ + js = json.dumps(params, separators=(",", ":")) + encoded = base64.b64encode(js.encode("utf-8")).decode("ascii") + return "{} oauth_client_id={}".format(base_connstr, encoded) + + +def _wait_reload(node, offset): + """Reload and wait for the config-reload log line, returning the new offset.""" + node.reload() + return node.wait_for_log(r"reloading configuration files", offset) + + +def _setup_node(node): + """Initialize the server exactly as the top of 001_server.pl does.""" + node.append_conf("log_connections = all\n") + node.append_conf("oauth_validator_libraries = 'validator'\n") + node.append_conf("log_min_messages = debug2") + node.start() + node.safe_psql("CREATE USER test;") + node.safe_psql("CREATE USER testalt;") + node.safe_psql("CREATE USER testparam;") + + +def _phase_http_rejected(node, issuer): + """HTTP and untrusted HTTPS are refused without a real debug marker.""" + node.connect_fails( + "user=test dbname=postgres oauth_issuer={} oauth_client_id=f02c6361-0635".format( + issuer + ), + "HTTPS is required without debug mode", + expected_stderr=( + r'OAuth discovery URI "' + + re.escape(issuer + "/.well-known/openid-configuration") + + r'" must use HTTPS' + ), + ) + + # PGOAUTHDEBUG=http should have no effect (it needs an UNSAFE: marker). + saved = os.environ.get("PGOAUTHDEBUG") + os.environ["PGOAUTHDEBUG"] = "http" + try: + node.connect_fails( + "user=test dbname=postgres oauth_issuer={} oauth_client_id=f02c6361-0635".format( + issuer + ), + "HTTPS is required without debug mode (bad PGOAUTHDEBUG value)", + expected_stderr=( + r'(?ms)^WARNING: .* option "http" is unsafe.*' + r'OAuth discovery URI "' + + re.escape(issuer + "/.well-known/openid-configuration") + + r'" must use HTTPS' + ), + ) + finally: + if saved is None: + os.environ.pop("PGOAUTHDEBUG", None) + else: + os.environ["PGOAUTHDEBUG"] = saved + + +def _phase_https_hba(node, issuer, bgconn, offset): + """Switch HBA to HTTPS issuers and verify pg_hba_file_rules() reflects it.""" + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + "\n" + 'local all test oauth issuer="{0}" scope="openid postgres"\n' + 'local all testalt oauth issuer="{0}/.well-known/oauth-authorization-server/alternate" scope="openid postgres alt"\n' + 'local all testparam oauth issuer="{0}/param" scope="openid postgres"\n'.format( + issuer + ), + filename="pg_hba.conf", + ) + offset = _wait_reload(node, offset) + + contents = bgconn.query_safe( + "SELECT rule_number, auth_method, options\n" + " FROM pg_hba_file_rules\n" + " ORDER BY rule_number;" + ) + expected = ( + '1|oauth|{{issuer={0},"scope=openid postgres",validator=validator}}\n' + '2|oauth|{{issuer={0}/.well-known/oauth-authorization-server/alternate,"scope=openid postgres alt",validator=validator}}\n' + '3|oauth|{{issuer={0}/param,"scope=openid postgres",validator=validator}}'.format( + issuer + ) + ) + assert contents == expected, "pg_hba_file_rules recreates OAuth HBA settings" + return offset + + +def _phase_ca_handling(node, issuer, alternative_ca): + """Certificate verification: UNSAFE keeps cert checks; oauth_ca_file works.""" + saved = os.environ.get("PGOAUTHDEBUG") + os.environ["PGOAUTHDEBUG"] = "UNSAFE" + try: + node.connect_fails( + "user=test dbname=postgres oauth_issuer={} oauth_client_id=f02c6361-0635".format( + issuer + ), + "HTTPS trusts only system CA roots by default", + expected_stderr=r"(?i)could not fetch OpenID discovery document:.*peer certificate", + ) + finally: + if saved is None: + os.environ.pop("PGOAUTHDEBUG", None) + else: + os.environ["PGOAUTHDEBUG"] = saved + + node.connect_ok( + "user=test dbname=postgres oauth_issuer={} oauth_client_id=f02c6361-0635 oauth_ca_file={}".format( + issuer, alternative_ca + ), + "connect as test (oauth_ca_file)", + expected_stderr=_VISIT, + log_like=[ + r'oauth_validator: token="9243959234", role="test"', + r'oauth_validator: issuer="' + + re.escape(issuer) + + r'", scope="openid postgres"', + r'connection authenticated: identity="test" method=oauth', + r"connection authorized", + ], + ) + + # From here on, rely on PGOAUTHCAFILE in the environment. + os.environ["PGOAUTHCAFILE"] = alternative_ca + node.connect_ok( + "user=test dbname=postgres oauth_issuer={} oauth_client_id=f02c6361-0635".format( + issuer + ), + "connect as test", + expected_stderr=_VISIT, + log_like=[ + r'oauth_validator: token="9243959234", role="test"', + r'oauth_validator: issuer="' + + re.escape(issuer) + + r'", scope="openid postgres"', + r'connection authenticated: identity="test" method=oauth', + r"connection authorized", + ], + log_unlike=[r"FATAL.*OAuth bearer authentication failed"], + ) + + +def _phase_alternate_and_require_auth(node, issuer): + """The /alternate issuer, issuer mismatch, and require_auth matrix.""" + # Enable extra debugging features for the remaining tests: + # trace, dos-endpoint (faster handshake), and call-count. + os.environ["PGOAUTHDEBUG"] = "UNSAFE:trace,dos-endpoint,call-count" + + node.connect_ok( + "user=testalt dbname=postgres oauth_issuer={}/alternate oauth_client_id=f02c6361-0636".format( + issuer + ), + "connect as testalt", + expected_stderr=_VISIT_ORG, + log_like=[ + r'oauth_validator: token="9243959234-alt", role="testalt"', + r'oauth_validator: issuer="' + + re.escape(issuer + "/.well-known/oauth-authorization-server/alternate") + + r'", scope="openid postgres alt"', + r'connection authenticated: identity="testalt" method=oauth', + r"connection authorized", + ], + log_unlike=[r"FATAL.*OAuth bearer authentication failed"], + ) + + node.connect_fails( + "user=testalt dbname=postgres oauth_issuer={} oauth_client_id=f02c6361-0636".format( + issuer + ), + "oauth_issuer must match discovery", + expected_stderr=( + r"server's discovery document at " + + re.escape(issuer + "/.well-known/oauth-authorization-server/alternate") + + r' \(issuer "' + + re.escape(issuer + "/alternate") + + r'"\) is incompatible with oauth_issuer \(' + + re.escape(issuer) + + r"\)" + ), + ) + + ok_cases = [ + "oauth", + "oauth,scram-sha-256", + "password,oauth", + "none,oauth", + "!scram-sha-256", + "!none", + ] + fail_cases = [ + ("!oauth", r"server requested OAUTHBEARER authentication"), + ("scram-sha-256", r"server requested OAUTHBEARER authentication"), + ("!password,!oauth", r"server requested OAUTHBEARER authentication"), + ("none", r"server requested SASL authentication"), + ("!oauth,!scram-sha-256", r"server requested SASL authentication"), + ] + base = "user=test dbname=postgres oauth_issuer={} oauth_client_id=f02c6361-0635".format( + issuer + ) + for require in ok_cases: + node.connect_ok( + "{} require_auth={}".format(base, require), + "require_auth={} succeeds".format(require), + expected_stderr=_VISIT, + ) + for require, failure in fail_cases: + node.connect_fails( + "{} require_auth={}".format(base, require), + "require_auth={} fails".format(require), + expected_stderr=failure, + ) + + +def _phase_vschars(node, issuer): + """The client_id/secret VSCHAR set is transmitted and encoded correctly.""" + node.connect_ok( + "user=test dbname=postgres oauth_issuer={} oauth_client_id={}".format( + issuer, _conninfo_quote(_VSCHARS) + ), + "escapable characters: client_id", + expected_stderr=_VISIT, + ) + node.connect_ok( + "user=test dbname=postgres oauth_issuer={} oauth_client_id={} oauth_client_secret={}".format( + issuer, _conninfo_quote(_VSCHARS), _conninfo_quote(_VSCHARS) + ), + "escapable characters: client_id and secret", + expected_stderr=_VISIT, + ) + + +def _phase_param_basics(node, common): + """The /param magic system works end-to-end, including token retries.""" + node.connect_ok( + _encode_connstr(common), "connect to /param", expected_stderr=_VISIT + ) + node.connect_ok( + _encode_connstr(common, stage="token", retries=1), + "token retry", + expected_stderr=_VISIT, + ) + node.connect_ok( + _encode_connstr(common, stage="token", retries=2), + "token retry (twice)", + expected_stderr=_VISIT, + ) + node.connect_ok( + _encode_connstr(common, stage="all", retries=1, interval=2), + "token retry (two second interval)", + expected_stderr=_VISIT, + ) + node.connect_ok( + _encode_connstr(common, stage="all", retries=1, interval=None), + "token retry (default interval)", + expected_stderr=_VISIT, + ) + + +def _phase_param_content_type(node, common): + """Content-type handling and the alternative verification_uri spelling.""" + node.connect_ok( + _encode_connstr( + common, stage="all", content_type="application/json;charset=utf-8" + ), + "content type with charset", + expected_stderr=_VISIT, + ) + node.connect_ok( + _encode_connstr( + common, stage="all", content_type="application/json \t;\t charset=utf-8" + ), + "content type with charset (whitespace)", + expected_stderr=_VISIT, + ) + node.connect_ok( + _encode_connstr(common, stage="device", uri_spelling="verification_url"), + "alternative spelling of verification_uri", + expected_stderr=_VISIT, + ) + + +def _phase_param_bad_responses(node, common): + """Overlarge, over-nested, and wrong-content-type responses are rejected.""" + node.connect_fails( + _encode_connstr(common, stage="device", huge_response=True), + "bad device authz response: overlarge JSON", + expected_stderr=r"could not obtain device authorization: response is too large", + ) + node.connect_fails( + _encode_connstr(common, stage="token", huge_response=True), + "bad token response: overlarge JSON", + expected_stderr=r"could not obtain access token: response is too large", + ) + + nesting_limit = 16 + node.connect_ok( + _encode_connstr( + common, + stage="device", + nested_array=nesting_limit, + nested_object=nesting_limit, + ), + "nested arrays and objects, up to parse limit", + expected_stderr=_VISIT, + ) + node.connect_fails( + _encode_connstr(common, stage="device", nested_array=nesting_limit + 1), + "bad discovery response: overly nested JSON array", + expected_stderr=r"could not parse device authorization: JSON is too deeply nested", + ) + node.connect_fails( + _encode_connstr(common, stage="device", nested_object=nesting_limit + 1), + "bad discovery response: overly nested JSON object", + expected_stderr=r"could not parse device authorization: JSON is too deeply nested", + ) + + node.connect_fails( + _encode_connstr(common, stage="device", content_type="text/plain"), + "bad device authz response: wrong content type", + expected_stderr=r"could not parse device authorization: unexpected content type", + ) + node.connect_fails( + _encode_connstr(common, stage="token", content_type="text/plain"), + "bad token response: wrong content type", + expected_stderr=r"could not parse access token response: unexpected content type", + ) + node.connect_fails( + _encode_connstr(common, stage="token", content_type="application/jsonx"), + "bad token response: wrong content type (correct prefix)", + expected_stderr=r"could not parse access token response: unexpected content type", + ) + + +def _phase_param_token_errors(node, common): + """Interval overflow and the various OAuth token error responses.""" + node.connect_fails( + _encode_connstr( + common, stage="all", interval=(2**64) - 1, retries=1, retry_code="slow_down" + ), + "bad token response: server overflows the device authz interval", + expected_stderr=r"could not obtain access token: slow_down interval overflow", + ) + + node.connect_fails( + _encode_connstr(common, stage="token", error_code="invalid_grant"), + "bad token response: invalid_grant, no description", + expected_stderr=r"could not obtain access token: \(invalid_grant\)", + ) + node.connect_fails( + _encode_connstr( + common, + stage="token", + error_code="invalid_grant", + error_desc="grant expired", + ), + "bad token response: expired grant", + expected_stderr=r"could not obtain access token: grant expired \(invalid_grant\)", + ) + node.connect_fails( + _encode_connstr( + common, stage="token", error_code="invalid_client", error_status=401 + ), + "bad token response: client authentication failure, default description", + expected_stderr=r"could not obtain access token: provider requires client authentication, and no oauth_client_secret is set \(invalid_client\)", + ) + node.connect_fails( + _encode_connstr( + common, + stage="token", + error_code="invalid_client", + error_status=401, + error_desc="authn failure", + ), + "bad token response: client authentication failure, provided description", + expected_stderr=r"could not obtain access token: authn failure \(invalid_client\)", + ) + + node.connect_fails( + _encode_connstr(common, stage="token", token=""), + "server rejects access: empty token", + expected_stderr=r"bearer authentication failed", + ) + node.connect_fails( + _encode_connstr(common, stage="token", token="****"), + "server rejects access: invalid token contents", + expected_stderr=r"bearer authentication failed", + ) + + +def _phase_client_secret(node, common): + """oauth_client_secret is forwarded and reflected in error descriptions.""" + base = "{} oauth_client_secret=''".format(common) + node.connect_ok( + _encode_connstr(base, stage="all", expected_secret=""), + "empty oauth_client_secret", + expected_stderr=_VISIT, + ) + + base = "{} oauth_client_secret={}".format(common, _conninfo_quote(_VSCHARS)) + node.connect_ok( + _encode_connstr(base, stage="all", expected_secret=_VSCHARS), + "nonempty oauth_client_secret", + expected_stderr=_VISIT, + ) + + node.connect_fails( + _encode_connstr( + base, stage="token", error_code="invalid_client", error_status=401 + ), + "bad token response: client authentication failure, default description with oauth_client_secret", + expected_stderr=r"could not obtain access token: provider rejected the oauth_client_secret \(invalid_client\)", + ) + node.connect_fails( + _encode_connstr( + base, + stage="token", + error_code="invalid_client", + error_status=401, + error_desc="mutual TLS required for client", + ), + "bad token response: client authentication failure, provided description with oauth_client_secret", + expected_stderr=r"could not obtain access token: mutual TLS required for client \(invalid_client\)", + ) + + +def _phase_call_count(node, common): + """A retrying flow must not loop excessively (sanity-bound the poll count).""" + result = node.psql_capture( + "SELECT 'connected for call count'", + extra_params=["-w"], + connstr=_encode_connstr(common, stage="token", retries=2), + on_error_stop=False, + ) + assert result.rc == 0, "call count connection succeeds\n{}".format(result.stderr) + assert re.search(_VISIT, result.stderr), "call count: stderr matches" + + match = re.search(r"\[libpq\] total number of polls: (\d+)", result.stderr) + assert match is not None, "call count: count is printed" + assert int(match.group(1)) < 100, "call count is reasonably small" + + +def _phase_stress_async(node, common): + """The builtin flow must work even if the app ignores polling signals.""" + base = "{} port={} host={}".format(common, node.port, node.host) + result = node.bin.run_command( + [ + "oauth_hook_client", + "--no-hook", + "--stress-async", + _encode_connstr(base, stage="all", retries=1, interval=1), + ] + ) + assert re.search( + r"connection succeeded", result.stdout + ), "stress-async: stdout matches" + assert not re.search( + r"connection to database failed", result.stderr + ), "stress-async: stderr matches" + + +def _phase_validator_failshut(node, bgconn, common, offset): + """A misbehaving validator must fail shut (no identity / not authorized).""" + bgconn.query_safe("ALTER SYSTEM SET oauth_validator.authn_id TO ''") + offset = _wait_reload(node, offset) + node.connect_fails( + "{} user=test".format(common), + "validator must set authn_id", + expected_stderr=r"OAuth bearer authentication failed", + log_like=[ + r'connection authenticated: identity=""', + r"FATAL:\s+OAuth bearer authentication failed", + r"DETAIL:\s+Validator provided no identity", + ], + ) + + bgconn.query_safe("ALTER SYSTEM SET oauth_validator.authn_id TO 'test@example.org'") + bgconn.query_safe("ALTER SYSTEM SET oauth_validator.authorize_tokens TO false") + offset = _wait_reload(node, offset) + node.connect_fails( + "{} user=test".format(common), + "validator must authorize token explicitly", + expected_stderr=r"OAuth bearer authentication failed", + log_like=[ + r'connection authenticated: identity="test@example\.org"', + r"FATAL:\s+OAuth bearer authentication failed", + r"DETAIL:\s+Validator failed to authorize the provided token", + ], + ) + + bgconn.query_safe( + "ALTER SYSTEM SET oauth_validator.error_detail TO 'something failed'" + ) + offset = _wait_reload(node, offset) + node.connect_fails( + "{} user=test".format(common), + "validator must authorize token explicitly (custom logdetail)", + expected_stderr=r"OAuth bearer authentication failed", + log_like=[ + r'connection authenticated: identity="test@example\.org"', + r"FATAL:\s+OAuth bearer authentication failed", + r"DETAIL:\s+something failed", + ], + ) + + bgconn.query_safe("ALTER SYSTEM SET oauth_validator.internal_error TO true") + offset = _wait_reload(node, offset) + node.connect_fails( + "{} user=test".format(common), + "validator internal error (custom logdetail)", + expected_stderr=r"OAuth bearer authentication failed", + log_like=[ + r"WARNING:\s+internal error in OAuth validator module", + r"DETAIL:\s+something failed", + ], + ) + + bgconn.query_safe("ALTER SYSTEM RESET oauth_validator.error_detail") + bgconn.query_safe("ALTER SYSTEM RESET oauth_validator.internal_error") + return offset + + +def _phase_bad_hba_option(node, bgconn, common, offset): + """Registering a bad HBA option warns but lets connections proceed.""" + bgconn.query_safe("ALTER SYSTEM RESET oauth_validator.authn_id") + bgconn.query_safe("ALTER SYSTEM RESET oauth_validator.authorize_tokens") + bgconn.query_safe("ALTER SYSTEM SET oauth_validator.invalid_hba TO true") + offset = _wait_reload(node, offset) + node.connect_ok( + "{} user=test".format(common), + "bad registered HBA option", + expected_stderr=_VISIT, + log_like=[ + r'WARNING:\s+HBA option name "bad option name" is invalid and will be ignored', + r'CONTEXT:\s+validator module "validator", in call to RegisterOAuthHBAOptions', + ], + ) + bgconn.query_safe("ALTER SYSTEM RESET oauth_validator.invalid_hba") + return offset + + +def _phase_user_mapping(node, bgconn, common, issuer, offset): + """User-mapping vs. ident delegation under an OAuth HBA.""" + (node.datadir / "pg_ident.conf").unlink() + node.append_conf("\noauthmap\tuser@example.com\ttest\n", filename="pg_ident.conf") + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + "\n" + 'local all test oauth issuer="{0}" scope="" map=oauthmap\n' + 'local all testalt oauth issuer="{0}" scope="" map=oauthmap\n' + 'local all testparam oauth issuer="{0}" scope="" delegate_ident_mapping=1\n'.format( + issuer + ), + filename="pg_hba.conf", + ) + bgconn.query_safe("ALTER SYSTEM RESET oauth_validator.authn_id") + bgconn.query_safe("ALTER SYSTEM RESET oauth_validator.authorize_tokens") + offset = _wait_reload(node, offset) + + node.connect_fails( + "{} user=test".format(common), + "mismatched username map (test)", + expected_stderr=r"OAuth bearer authentication failed", + ) + node.connect_fails( + "{} user=testalt".format(common), + "mismatched username map (testalt)", + expected_stderr=r"OAuth bearer authentication failed", + ) + + bgconn.query_safe("ALTER SYSTEM SET oauth_validator.authn_id TO 'user@example.com'") + offset = _wait_reload(node, offset) + + node.connect_ok( + "{} user=test".format(common), + "matched username map (test)", + expected_stderr=_VISIT, + ) + node.connect_fails( + "{} user=testalt".format(common), + "mismatched username map (testalt)", + expected_stderr=r"OAuth bearer authentication failed", + ) + node.connect_ok( + "{} user=testparam".format(common), + "delegated ident (testparam)", + expected_stderr=_VISIT, + ) + + bgconn.query_safe("ALTER SYSTEM RESET oauth_validator.authn_id") + offset = _wait_reload(node, offset) + return offset + + +def _phase_validator_hba_options(node, common, issuer, offset): + """Validator-specific HBA options, including bad-syntax restart failures.""" + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + "\n" + 'local all test oauth issuer="{0}" scope="openid postgres" delegate_ident_mapping=1 \\\n' + ' validator.authn_id="ignored" validator.authn_id="other-identity"\n' + 'local all testalt oauth issuer="{0}" scope="openid postgres" validator.log="testalt message"\n'.format( + issuer + ), + filename="pg_hba.conf", + ) + offset = _wait_reload(node, offset) + + node.connect_ok( + "{} user=test".format(common), + "custom HBA setting (test)", + expected_stderr=_VISIT, + log_like=[r'connection authenticated: identity="other-identity"'], + ) + node.connect_ok( + "{} user=testalt".format(common), + "custom HBA setting (testalt)", + expected_stderr=_VISIT, + log_like=[ + r"LOG:\s+testalt message", + r'connection authenticated: identity="testalt"', + ], + ) + + # Bad syntax: empty option name. + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + '\nlocal all testalt oauth issuer="{0}" scope="openid postgres" validator.=1\n'.format( + issuer + ), + filename="pg_hba.conf", + ) + log_start = node.current_log_position() + assert ( + node.restart( + fail_ok=True, + log_like=[r'invalid OAuth validator option name: "validator\."'], + ) + is False + ), "empty HBA option name" + + # Bad syntax: invalid characters in option name. + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + '\nlocal all testalt oauth issuer="{0}" scope="openid postgres" validator.@@=1\n'.format( + issuer + ), + filename="pg_hba.conf", + ) + node.current_log_position() + assert ( + node.restart( + fail_ok=True, + log_like=[r'invalid OAuth validator option name: "validator\.@@"'], + ) + is False + ), "invalid HBA option name" + + # Unknown settings: validation deferred to connect time. + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + "\n" + 'local all testalt oauth issuer="{0}" scope="openid postgres" \\\n' + " validator.log=ignored validator.bad=1\n".format( + issuer + ), + filename="pg_hba.conf", + ) + node.restart() + node.connect_fails( + "{} user=testalt".format(common), + "bad HBA setting", + expected_stderr=r"OAuth bearer authentication failed", + log_like=[ + r'WARNING:\s+unrecognized authentication option name: "validator\.bad"', + r"FATAL:\s+OAuth bearer authentication failed", + r'DETAIL:\s+unrecognized authentication option name: "validator\.bad"', + ], + ) + _ = log_start + return node.current_log_position() + + +def _phase_multiple_validators(node, issuer, offset): + """With multiple validators each HBA line must name one explicitly.""" + node.append_conf("oauth_validator_libraries = 'validator, fail_validator'\n") + assert ( + node.restart(fail_ok=True) is False + ), "restart fails without explicit validators in oauth HBA entries" + offset = node.wait_for_log( + r'authentication method "oauth" requires option "validator" to be set', offset + ) + + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + "\n" + 'local all test oauth validator=validator issuer="{0}" scope="openid postgres"\n' + 'local all testalt oauth validator=fail_validator issuer="{0}/.well-known/oauth-authorization-server/alternate" scope="openid postgres alt"\n'.format( + issuer + ), + filename="pg_hba.conf", + ) + node.restart() + offset = node.wait_for_log(r"ready to accept connections", offset) + + node.connect_ok( + "user=test dbname=postgres oauth_issuer={} oauth_client_id=f02c6361-0635".format( + issuer + ), + "validator is used for test", + expected_stderr=_VISIT, + log_like=[r"connection authorized"], + ) + node.connect_fails( + "user=testalt dbname=postgres oauth_issuer={}/.well-known/oauth-authorization-server/alternate oauth_client_id=f02c6361-0636".format( + issuer + ), + "fail_validator is used for testalt", + expected_stderr=r"FATAL:\s+fail_validator: sentinel error", + ) + return offset + + +def _phase_magic_validator(node, issuer, offset): + """The ABI magic-marker mismatch is detected at module load.""" + node.append_conf("oauth_validator_libraries = 'magic_validator'\n") + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + "\n" + 'local all test oauth validator=magic_validator issuer="{0}" scope="openid postgres"\n'.format( + issuer + ), + filename="pg_hba.conf", + ) + node.restart() + node.wait_for_log(r"ready to accept connections", offset) + + node.connect_fails( + "user=test dbname=postgres oauth_issuer={}/.well-known/oauth-authorization-server/alternate oauth_client_id=f02c6361-0636".format( + issuer + ), + "magic_validator is used", + expected_stderr=r'FATAL:\s+OAuth validator module "magic_validator": magic number mismatch', + ) + + +def test_001_server(create_pg, webserver): # pylint: disable=too-many-locals + """End-to-end OAuth server-side, HBA, and validator behavior.""" + node = create_pg("primary", start=False) + _setup_node(node) + + bgconn = node.background_psql() + + port = webserver.port + cert_dir = os.environ["cert_dir"] + alternative_ca = os.path.join(cert_dir, "root+server_ca.crt") + + try: + # First confirm HTTP / untrusted HTTPS are refused. + http_issuer = "http://127.0.0.1:{}".format(port) + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + '\nlocal all test oauth issuer="{}" scope="openid postgres"\n'.format( + http_issuer + ), + filename="pg_hba.conf", + ) + offset = _wait_reload(node, 0) + _phase_http_rejected(node, http_issuer) + + # Switch to HTTPS for the remainder of the test. + issuer = "https://127.0.0.1:{}".format(port) + offset = _phase_https_hba(node, issuer, bgconn, offset) + _phase_ca_handling(node, issuer, alternative_ca) + _phase_alternate_and_require_auth(node, issuer) + _phase_vschars(node, issuer) + + common = "user=testparam dbname=postgres oauth_issuer={}/param ".format(issuer) + _phase_param_basics(node, common) + _phase_param_content_type(node, common) + _phase_param_bad_responses(node, common) + _phase_param_token_errors(node, common) + _phase_client_secret(node, common) + _phase_call_count(node, common) + _phase_stress_async(node, common) + + # The validator-reconfiguration phases hardcode the discovery URI and an + # empty scope to keep the logs uncluttered. + common = ( + "dbname=postgres oauth_issuer={}/.well-known/openid-configuration " + "oauth_scope='' oauth_client_id=f02c6361-0635".format(issuer) + ) + offset = _phase_validator_failshut(node, bgconn, common, offset) + offset = _phase_bad_hba_option(node, bgconn, common, offset) + offset = _phase_user_mapping(node, bgconn, common, issuer, offset) + + bgconn.quit() # the remaining phases restart the server + + offset = _phase_validator_hba_options(node, common, issuer, offset) + offset = _phase_multiple_validators(node, issuer, offset) + _phase_magic_validator(node, issuer, offset) + node.stop() + finally: + os.environ.pop("PGOAUTHCAFILE", None) + os.environ.pop("PGOAUTHDEBUG", None) diff --git a/src/test/modules/oauth_validator/pyt/test_002_client.py b/src/test/modules/oauth_validator/pyt/test_002_client.py new file mode 100644 index 0000000000000..c1ea50cab997a --- /dev/null +++ b/src/test/modules/oauth_validator/pyt/test_002_client.py @@ -0,0 +1,307 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/oauth_validator/t/002_client.pl. + +Exercises the API for custom OAuth client flows, using the oauth_hook_client +test driver. These tests do not use the builtin flow and do not contact a real +authorization server, so the issuer address is an invalid IP (any accidental +connection attempt then fails noisily). Gated behind PG_TEST_EXTRA=oauth. +""" + +import contextlib +import os + +import pytest + +import pypg + +pytestmark = pypg.require_test_extras("oauth") + +_ISSUER = "https://256.256.256.256" +_SCOPE = "openid postgres" +_USER = "test" + + +@contextlib.contextmanager +def _env(**overrides): + """Temporarily set environment variables (mirrors Perl local $ENV{...}).""" + saved = {k: os.environ.get(k) for k in overrides} + os.environ.update({k: str(v) for k, v in overrides.items()}) + try: + yield + finally: + for key, value in saved.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + +@pytest.fixture(scope="module") +def oauth_node(create_pg_module): + """A server configured for the custom-flow client tests. + + Mirrors the cluster setup at the top of 002_client.pl: the validator + library is loaded, connection logging is on, debug2 messages are enabled so + connection failures can be inspected in the log, and pg_hba grants the test + user OAuth auth for the (unreachable) issuer. + """ + node = create_pg_module("primary", start=False) + node.append_conf("log_connections = all\n") + node.append_conf("oauth_validator_libraries = 'validator'\n") + node.append_conf("log_min_messages = debug2") + node.start() + + node.safe_psql("CREATE USER test;") + + (node.datadir / "pg_hba.conf").unlink() + node.append_conf( + 'local all test oauth issuer="{}" scope="{}"\n'.format(_ISSUER, _SCOPE), + filename="pg_hba.conf", + ) + node.reload() + node.wait_for_log(r"reloading configuration files") + + with _env(PGOAUTHDEBUG="UNSAFE"): + yield node + + +def _run_hook_client(node, common_connstr, flags=None): + """Run oauth_hook_client with the given flags and connstr. + + Returns (CommandResult, log_start) where log_start is the server log offset + captured immediately before the run, for later log_check assertions. + """ + cmd = ["oauth_hook_client"] + list(flags or []) + [common_connstr] + log_start = node.current_log_position() + result = node.bin.run_command(cmd) + return result, log_start + + +def _check( + node, + result, + log_start, + test_name, + *, + expect_success=False, + expected_stderr=None, + log_like=None, +): + """Apply the stdout/stderr/log assertions of 002_client.pl's test().""" + if expect_success: + assert "connection succeeded" in result.stdout, "{}: stdout matches\n{}".format( + test_name, result.stdout + ) + + if expected_stderr is not None: + import re + + assert re.search( + expected_stderr, result.stderr + ), "{}: stderr matches {!r}\n{}".format( + test_name, expected_stderr, result.stderr + ) + else: + assert result.stderr == "", "{}: no stderr, got {!r}".format( + test_name, result.stderr + ) + + if log_like is not None: + # Wait for the postmaster to flush the finished connection's log, to + # avoid races (see Cluster::connect_fails()). + node.wait_for_log( + r"(?s)DEBUG: (?:00000: )?forked new client backend, pid=(\d+) socket" + r".*DEBUG: (?:00000: )?client backend \(PID \1\) exited with exit code \d", + log_start, + ) + node.log_check(test_name, log_start, log_like=log_like) + + +def test_basic_synchronous_hook_provides_token(oauth_node): + """A basic v2 synchronous hook can provide a token.""" + connstr = "{} user={} oauth_issuer={} oauth_client_id=myID".format( + oauth_node.connstr(), _USER, _ISSUER + ) + result, log_start = _run_hook_client( + oauth_node, + connstr, + flags=[ + "--token", + "my-token", + "--expected-uri", + "{}/.well-known/openid-configuration".format(_ISSUER), + "--expected-issuer", + _ISSUER, + "--expected-scope", + _SCOPE, + ], + ) + _check( + oauth_node, + result, + log_start, + "basic synchronous hook can provide a token", + expect_success=True, + log_like=[r'oauth_validator: token="my-token", role="{}"'.format(_USER)], + ) + + +def test_derived_issuer_id_provided(oauth_node): + """The issuer ID provided to the hook is derived from oauth_issuer.""" + connstr = ( + "{} user={} oauth_issuer={}/.well-known/openid-configuration " + "oauth_client_id=myID oauth_scope='{}'" + ).format(oauth_node.connstr(), _USER, _ISSUER, _SCOPE) + result, log_start = _run_hook_client( + oauth_node, + connstr, + flags=[ + "--token", + "my-token", + "--expected-uri", + "{}/.well-known/openid-configuration".format(_ISSUER), + "--expected-issuer", + _ISSUER, + "--expected-scope", + _SCOPE, + ], + ) + _check( + oauth_node, + result, + log_start, + "derived issuer ID is correctly provided", + expect_success=True, + log_like=[r'oauth_validator: token="my-token", role="{}"'.format(_USER)], + ) + + +def test_v1_synchronous_hook_provides_token(oauth_node): + """The v1 synchronous hook continues to work.""" + connstr = "{} user={} oauth_issuer={} oauth_client_id=myID".format( + oauth_node.connstr(), _USER, _ISSUER + ) + result, log_start = _run_hook_client( + oauth_node, + connstr, + flags=[ + "-v1", + "--token", + "my-token-v1", + "--expected-uri", + "{}/.well-known/openid-configuration".format(_ISSUER), + "--expected-scope", + _SCOPE, + ], + ) + _check( + oauth_node, + result, + log_start, + "v1 synchronous hook can provide a token", + expect_success=True, + log_like=[r'oauth_validator: token="my-token-v1", role="{}"'.format(_USER)], + ) + + +def test_fails_without_custom_hook_when_no_libcurl(oauth_node): + """Without a custom hook and without libcurl, libpq points at libpq-oauth.""" + if os.environ.get("with_libcurl") == "yes": + pytest.skip("builtin flow is available; no-hook fallback message not emitted") + connstr = "{} user={} oauth_issuer={} oauth_client_id=myID".format( + oauth_node.connstr(), _USER, _ISSUER + ) + result, log_start = _run_hook_client(oauth_node, connstr, flags=["--no-hook"]) + _check( + oauth_node, + result, + log_start, + "fails without custom hook installed", + expected_stderr=r"no OAuth flows are available \(try installing the libpq-oauth package\)", + ) + + +def test_synchronous_hook_sets_error_message(oauth_node): + """A v2 synchronous flow can set a custom error message.""" + connstr = "{} user={} oauth_issuer={} oauth_client_id=myID".format( + oauth_node.connstr(), _USER, _ISSUER + ) + result, log_start = _run_hook_client( + oauth_node, connstr, flags=["--error", "a custom error message"] + ) + _check( + oauth_node, + result, + log_start, + "basic synchronous hook can set error messages", + expected_stderr=r"user-defined OAuth flow failed: a custom error message", + ) + + +def test_connect_timeout_interrupts_hung_flow(oauth_node): + """connect_timeout interrupts a client flow that never responds.""" + connstr = ( + "{} user={} oauth_issuer={} oauth_client_id=myID connect_timeout=1" + ).format(oauth_node.connstr(), _USER, _ISSUER) + result, log_start = _run_hook_client(oauth_node, connstr, flags=["--hang-forever"]) + _check( + oauth_node, + result, + log_start, + "connect_timeout interrupts hung client flow", + expected_stderr=r"failed: timeout expired", + ) + + +_MISBEHAVE_CASES = [ + ( + "--misbehave=no-hook", + r"user-defined OAuth flow provided neither a token nor an async callback", + ), + ("--misbehave=fail-async", r"user-defined OAuth flow failed"), + ("--misbehave=no-token", r"user-defined OAuth flow did not provide a token"), + ( + "--misbehave=no-socket", + r"user-defined OAuth flow did not provide a socket for polling", + ), +] + + +@pytest.mark.parametrize("flag,expected_error", _MISBEHAVE_CASES) +@pytest.mark.parametrize("v1", [False, True]) +def test_hook_misbehavior(oauth_node, flag, expected_error, v1): + """Each client-hook misbehavior is reported, for both v1 and v2 hooks.""" + connstr = "{} user={} oauth_issuer={} oauth_client_id=myID".format( + oauth_node.connstr(), _USER, _ISSUER + ) + flags = ["-v1", flag] if v1 else [flag] + suffix = " (v1)" if v1 else "" + result, log_start = _run_hook_client(oauth_node, connstr, flags=flags) + _check( + oauth_node, + result, + log_start, + "hook misbehavior: {}{}".format(flag, suffix), + expected_stderr=expected_error, + ) + + +def test_async_hook_sets_error_message(oauth_node): + """A v2 async flow can also set a custom error message.""" + connstr = "{} user={} oauth_issuer={} oauth_client_id=myID".format( + oauth_node.connstr(), _USER, _ISSUER + ) + result, log_start = _run_hook_client( + oauth_node, + connstr, + flags=["--misbehave", "fail-async", "--error", "async error message"], + ) + _check( + oauth_node, + result, + log_start, + "asynchronous hook can set error messages", + expected_stderr=r"user-defined OAuth flow failed: async error message", + ) diff --git a/src/test/modules/ssl_passphrase_callback/meson.build b/src/test/modules/ssl_passphrase_callback/meson.build index 1b4078c037e55..35ca9f0890fa2 100644 --- a/src/test/modules/ssl_passphrase_callback/meson.build +++ b/src/test/modules/ssl_passphrase_callback/meson.build @@ -54,4 +54,10 @@ tests += { ], 'env': {'with_ssl': 'openssl'}, }, + 'pytest': { + 'tests': [ + 'pyt/test_001_testfunc.py', + ], + 'env': {'with_ssl': 'openssl'}, + }, } diff --git a/src/test/modules/ssl_passphrase_callback/pyt/test_001_testfunc.py b/src/test/modules/ssl_passphrase_callback/pyt/test_001_testfunc.py new file mode 100644 index 0000000000000..2352c51a8cbb8 --- /dev/null +++ b/src/test/modules/ssl_passphrase_callback/pyt/test_001_testfunc.py @@ -0,0 +1,80 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/ssl_passphrase_callback/t/001_testfunc.pl. + +The ssl_passphrase_func module supplies the TLS key passphrase via a GUC: the +server starts with the correct passphrase, warns that ssl_passphrase_command is +ignored, fails to start with a wrong passphrase, and (non-LibreSSL) warns that +an installed TLS init hook is ignored when SNI is enabled -- exactly once. +Requires an OpenSSL build. +""" + +import os +import re +import shutil + +import pytest + +import pypg + +_SRCDIR = os.path.join(os.path.dirname(__file__), "..") + + +def test_001_testfunc(create_pg): + """ssl_passphrase_func passphrase GUC drives TLS key unlock and warnings.""" + if os.environ.get("with_ssl") != "openssl": + pytest.skip("OpenSSL not supported by this build") + libressl = not pypg.check_pg_config(r"#define HAVE_SSL_CTX_SET_CERT_CB 1") + rot13pass = "SbbOnE1" + node = create_pg("main", start=False) + node.append_conf("ssl_passphrase.passphrase = '{}'".format(rot13pass)) + node.append_conf("shared_preload_libraries = 'ssl_passphrase_func'") + node.append_conf("ssl = 'on'") + ddir = node.datadir + shutil.copy(os.path.join(_SRCDIR, "server.crt"), ddir) + shutil.copy(os.path.join(_SRCDIR, "server.key"), ddir) + os.chmod(os.path.join(ddir, "server.key"), 0o600) + node.start() + assert os.path.exists("{}/postmaster.pid".format(ddir)), "postgres started" + node.stop("fast") + log = node.rotate_logfile() + node.append_conf("ssl_passphrase_command = 'echo spl0tz'") + node.start() + node.stop("fast") + log_contents = pypg.slurp_file(log) + assert re.search( + r'WARNING.*"ssl_passphrase_command" setting ignored by ' + r"ssl_passphrase_func module", + log_contents, + ), "ssl_passphrase_command set warning" + node.append_conf("ssl_passphrase.passphrase = 'blurfl'") + result = node.bin.run_command( + ["pg_ctl", "--pgdata", str(node.datadir), "--log", str(node.log), "start"] + ) + assert result.rc != 0, "pg_ctl fails with bad passphrase" + assert not os.path.exists( + "{}/postmaster.pid".format(ddir) + ), "postgres not started with bad passphrase" + node.stop("fast") + if libressl: + return + node.append_conf("\nssl_passphrase_command = 'echo FooBaR1'\nssl_sni = on\n") + node.append_conf( + 'example.org "{d}/server.crt" "{d}/server.key" "" "echo FooBaR1" on\n' + 'example.com "{d}/server.crt" "{d}/server.key" "" "echo FooBaR1" on\n'.format( + d=ddir + ), + "pg_hosts.conf", + ) + node.start() + assert os.path.exists( + "{}/postmaster.pid".format(ddir) + ), "postgres started after SNI" + node.stop("fast") + log_contents = pypg.slurp_file(log) + assert re.search( + r"WARNING.*SNI is enabled; installed TLS init hook will be ignored", + log_contents, + ), "server warns that init hook and SNI are incompatible" + count = len(re.findall(r"installed TLS init hook will be ignored", log_contents)) + assert count == 1, "Only one WARNING" diff --git a/src/test/modules/test_aio/meson.build b/src/test/modules/test_aio/meson.build index 909f81d96c14d..26818cebb8d49 100644 --- a/src/test/modules/test_aio/meson.build +++ b/src/test/modules/test_aio/meson.build @@ -25,6 +25,17 @@ tests += { 'name': 'test_aio', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_001_aio.py', + 'pyt/test_002_io_workers.py', + 'pyt/test_003_initdb.py', + 'pyt/test_004_read_stream.py', + ], + }, 'tap': { 'env': { 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', diff --git a/src/test/modules/test_aio/pyt/test_001_aio.py b/src/test/modules/test_aio/pyt/test_001_aio.py new file mode 100644 index 0000000000000..2d543de4c3475 --- /dev/null +++ b/src/test/modules/test_aio/pyt/test_001_aio.py @@ -0,0 +1,1530 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_aio/t/001_aio.pl. + +Exercises the test_aio extension's read/write paths across every supported +io_method: the IO-handle and batchmode APIs, invalid-page and checksum-failure +reporting, StartBufferIO/TerminateBufferIO interplay, foreign-IO completion, +FD-close handling, relation invalidation during IO, ZERO_ON_ERROR / +zero_damaged_pages, cross-database CREATE DATABASE checksum accounting, +StartReadBuffers(), and -- when the build has injection points -- hard IO +errors, short reads and worker-reopen failures. +""" + +# pylint: disable=too-many-lines + +import os +import re + +import testaio # pyrefly: ignore + + +def _psql_like(psql, sql, expected_stdout, expected_stderr): + """Run sql on a background psql and match its stdout/stderr (cf. psql_like). + + Returns the statement's stdout. Mirrors the Perl psql_like helper: the + statement's stdout must match expected_stdout, its stderr must match + expected_stderr, and the live stderr buffer is then cleared. + """ + output = psql.query(sql) + assert re.search(expected_stdout, output), "expected stdout {!r}, got {!r}".format( + expected_stdout, output + ) + assert re.search( + expected_stderr, psql.last_stderr + ), "expected stderr {!r}, got {!r}".format(expected_stderr, psql.last_stderr) + psql.clear() + return output + + +def _query_wait_block(node, psql, sql, waitfor, wait_current_session): + """Issue sql, then wait for waitfor to be observed (cf. query_wait_block). + + If wait_current_session is true, wait for the event in the issuing session, + otherwise wait for any session. + """ + pid = psql.query_safe("SELECT pg_backend_pid()") + + psql.send("{};\n".format(sql)) + + if wait_current_session: + waitquery = "SELECT wait_event FROM pg_stat_activity WHERE pid = {}".format(pid) + else: + waitquery = ( + "SELECT wait_event FROM pg_stat_activity " + "WHERE wait_event = '{}'".format(waitfor) + ) + + assert node.poll_query_until(waitquery, waitfor) + + +def _checksum_failures(psql, datname=None): + """Return (count, last_failure) for datname, or shared rels if None. + + Mirrors the Perl checksum_failures helper. + """ + if datname is not None: + checksum_count = psql.query_safe( + "\nSELECT checksum_failures FROM pg_stat_database " + "WHERE datname = '{}';\n".format(datname) + ) + checksum_last_failure = psql.query_safe( + "\nSELECT checksum_last_failure FROM pg_stat_database " + "WHERE datname = '{}';\n".format(datname) + ) + else: + checksum_count = psql.query_safe( + "\nSELECT checksum_failures FROM pg_stat_database " + "WHERE datname IS NULL;\n" + ) + checksum_last_failure = psql.query_safe( + "\nSELECT checksum_last_failure FROM pg_stat_database " + "WHERE datname IS NULL;\n" + ) + return checksum_count, checksum_last_failure + + +def _test_handle(node): + """Sanity checks for the IO handle API.""" + psql = node.background_psql("postgres", on_error_stop=False) + + # leak warning: implicit xact + _psql_like(psql, "SELECT handle_get()", r"^$", r"leaked AIO handle") + + # leak warning: explicit xact + _psql_like(psql, "BEGIN; SELECT handle_get(); COMMIT", r"^$", r"leaked AIO handle") + + # leak warning: explicit xact, rollback + _psql_like( + psql, "BEGIN; SELECT handle_get(); ROLLBACK;", r"^$", r"leaked AIO handle" + ) + + # leak warning: subtrans + _psql_like( + psql, + "BEGIN; SAVEPOINT foo; SELECT handle_get(); COMMIT;", + r"^$", + r"leaked AIO handle", + ) + + # leak warning + error: released in different command (thus resowner) + _psql_like( + psql, + "BEGIN; SELECT handle_get(); SELECT handle_release_last(); COMMIT;", + r"^$", + r"(?s)leaked AIO handle.*release in unexpected state", + ) + + # no leak, release in same command + _psql_like( + psql, + "BEGIN; SELECT handle_get() UNION ALL SELECT handle_release_last(); COMMIT;", + r"^$", + r"^$", + ) + + # normal handle use + _psql_like(psql, "SELECT handle_get_release()", r"^$", r"^$") + + # should error out, API violation + _psql_like( + psql, + "SELECT handle_get_twice()", + r"^$", + r"ERROR: API violation: Only one IO can be handed out$", + ) + + # recover after error in implicit xact + _psql_like( + psql, + "SELECT handle_get_and_error(); SELECT 'ok', handle_get_release()", + r"^|ok$", + r"ERROR.*as you command", + ) + + # recover after error in explicit xact + _psql_like( + psql, + "BEGIN; SELECT handle_get_and_error(); " + "SELECT handle_get_release(), 'ok'; COMMIT;", + r"^|ok$", + r"ERROR.*as you command", + ) + + # recover after error in subtrans + _psql_like( + psql, + "BEGIN; SAVEPOINT foo; SELECT handle_get_and_error(); " + "ROLLBACK TO SAVEPOINT foo; SELECT handle_get_release(); ROLLBACK;", + r"^|ok$", + r"ERROR.*as you command", + ) + + psql.quit() + + +def _test_batchmode(node): + """Sanity checks for the batchmode API.""" + psql = node.background_psql("postgres", on_error_stop=False) + + # In a build with RELCACHE_FORCE_RELEASE and CATCACHE_FORCE_RELEASE, just + # using SELECT batch_start() causes spurious test failures, because the + # lookup of the type information when printing the result tuple also starts + # a batch. The easiest way around is to not print a result tuple. + batch_start_sql = "SELECT WHERE batch_start() IS NULL" + + # leak warning & recovery: implicit xact + _psql_like(psql, batch_start_sql, r"^$", r"open AIO batch at end") + + # leak warning & recovery: explicit xact + _psql_like( + psql, + "BEGIN; {}; COMMIT;".format(batch_start_sql), + r"^$", + r"open AIO batch at end", + ) + + # leak warning & recovery: explicit xact, rollback + # + # XXX: This doesn't fail right now, due to not getting a chance to do + # something at transaction command commit. That's not a correctness issue, + # it just means it's a bit harder to find buggy code. + + # no warning, batch closed in same command + _psql_like( + psql, + "{} UNION ALL SELECT WHERE batch_end() IS NULL".format(batch_start_sql), + r"^$", + r"^$", + ) + + psql.quit() + + +def _test_io_error(node): + """Test that simple cases of invalid pages are reported.""" + psql = node.background_psql("postgres", on_error_stop=False) + + psql.query_safe( + "\n" + "CREATE TEMPORARY TABLE tmp_corr(data int not null);\n" + "INSERT INTO tmp_corr SELECT generate_series(1, 10000);\n" + "SELECT modify_rel_block('tmp_corr', 1, corrupt_header=>true);\n" + ) + + for tblname in ("tbl_corr", "tmp_corr"): + if tblname == "tbl_corr": + invalid_page_re = r'invalid page in block 1 of relation "base/\d+/\d+' + else: + invalid_page_re = r'invalid page in block 1 of relation "base/\d+/t\d+_\d+' + + # verify the error is reported in custom C code + _psql_like( + psql, + "SELECT read_rel_block_ll('{}', 1)".format(tblname), + r"^$", + invalid_page_re, + ) + + # verify the error is reported for bufmgr reads, seq scan + _psql_like( + psql, "SELECT count(*) FROM {}".format(tblname), r"^$", invalid_page_re + ) + + # verify the error is reported for bufmgr reads, tid scan + _psql_like( + psql, + "SELECT count(*) FROM {} WHERE ctid = '(1, 1)'".format(tblname), + r"^$", + invalid_page_re, + ) + + psql.quit() + + +def _startwait_normal(node, psql_a, psql_b): + """StartBufferIO/TerminateBufferIO interplay for a normal table.""" + # create a buffer we can play around with + buf_id = _psql_like( + psql_a, "SELECT buffer_create_toy('tbl_ok', 1)", r"^\d+$", r"^$" + ) + + # check that one backend can perform StartBufferIO + _psql_like( + psql_a, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>true);".format(buf_id), + r"^t$", + r"^$", + ) + + # but not twice on the same buffer (non-waiting) + _psql_like( + psql_a, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>false);".format(buf_id), + r"^f$", + r"^$", + ) + _psql_like( + psql_b, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>false);".format(buf_id), + r"^f$", + r"^$", + ) + + # start io in a different session, will block + _query_wait_block( + node, + psql_b, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>true);".format(buf_id), + "BufferIo", + 1, + ) + + # Terminate the IO, without marking it as success, this should trigger the + # waiting session to be able to start the io + _psql_like( + psql_a, + "SELECT buffer_call_terminate_io({}, for_input=>true, succeed=>false, " + "io_error=>false, release_aio=>false)".format(buf_id), + r"^$", + r"^$", + ) + + # Because the IO was terminated, but not marked as valid, second session + # should get the right to start io + psql_b.query_until(r"t") + psql_b.clear() + + # terminate the IO again + psql_b.query_safe( + "SELECT buffer_call_terminate_io({}, for_input=>true, succeed=>false, " + "io_error=>false, release_aio=>false);".format(buf_id) + ) + + # same as the above scenario, but mark IO as having succeeded + _psql_like( + psql_a, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>true);".format(buf_id), + r"^t$", + r"^$", + ) + + # start io in a different session, will block + _query_wait_block( + node, + psql_b, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>true);".format(buf_id), + "BufferIo", + 1, + ) + + # Terminate the IO, marking it as success + _psql_like( + psql_a, + "SELECT buffer_call_terminate_io({}, for_input=>true, succeed=>true, " + "io_error=>false, release_aio=>false)".format(buf_id), + r"^$", + r"^$", + ) + + # Because the IO was terminated, and marked as valid, second session should + # complete but not need io + psql_b.query_until(r"f") + psql_b.clear() + + # buffer is valid now, make it invalid again + psql_a.query_safe("SELECT buffer_create_toy('tbl_ok', 1);") + + +def _startwait_temp(node, psql_a): # pylint: disable=unused-argument + """StartLocalBufferIO behaviour for a temporary table.""" + # create a buffer we can play around with + psql_a.query_safe( + "\n" + "CREATE TEMPORARY TABLE tmp_ok(data int not null);\n" + "INSERT INTO tmp_ok SELECT generate_series(1, 10000);\n" + ) + buf_id = psql_a.query_safe("SELECT buffer_create_toy('tmp_ok', 3);") + + # check that one backend can perform StartLocalBufferIO + _psql_like( + psql_a, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>false);".format(buf_id), + r"^t$", + r"^$", + ) + + # Because local buffers don't use IO_IN_PROGRESS, a second + # StartLocalBufferIO succeeds as well. This test mostly serves as a + # documentation of that fact. If we had actually started IO, it'd be + # different. + _psql_like( + psql_a, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>false);".format(buf_id), + r"^t$", + r"^$", + ) + + # Terminate the IO again, without marking it as a success + psql_a.query_safe( + "SELECT buffer_call_terminate_io({}, for_input=>true, succeed=>false, " + "io_error=>false, release_aio=>false);".format(buf_id) + ) + _psql_like( + psql_a, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>false);".format(buf_id), + r"^t$", + r"^$", + ) + + # Terminate the IO again, marking it as a success + psql_a.query_safe( + "SELECT buffer_call_terminate_io({}, for_input=>true, succeed=>true, " + "io_error=>false, release_aio=>false);".format(buf_id) + ) + + # Now another StartLocalBufferIO should fail, this time because the buffer + # is already valid. + _psql_like( + psql_a, + "SELECT buffer_call_start_io({}, for_input=>true, wait=>true);".format(buf_id), + r"^f$", + r"^$", + ) + + +def _test_startwait_io(node): + """Test interplay between StartBufferIO and TerminateBufferIO.""" + psql_a = node.background_psql("postgres", on_error_stop=False) + psql_b = node.background_psql("postgres", on_error_stop=False) + + _startwait_normal(node, psql_a, psql_b) + _startwait_temp(node, psql_a) + + psql_a.quit() + psql_b.quit() + + +def _test_complete_foreign(node): + """If the issuing backend doesn't wait, another backend completes the IO.""" + psql_a = node.background_psql("postgres", on_error_stop=False) + psql_b = node.background_psql("postgres", on_error_stop=False) + + # Issue IO without waiting for completion, then sleep + psql_a.query_safe("SELECT read_rel_block_ll('tbl_ok', 1, wait_complete=>false);") + + # Check that another backend can read the relevant block + _psql_like( + psql_b, + "SELECT count(*) FROM tbl_ok WHERE ctid = '(1,1)' LIMIT 1", + r"^1$", + r"^$", + ) + + # Issue IO without waiting for completion, then exit. + psql_a.query_safe("SELECT read_rel_block_ll('tbl_ok', 1, wait_complete=>false);") + psql_a.reconnect_and_clear() + + # Check that another backend can read the relevant block. This verifies that + # the exiting backend left the AIO in a sane state. + _psql_like( + psql_b, + "SELECT count(*) FROM tbl_ok WHERE ctid = '(1,1)' LIMIT 1", + r"^1$", + r"^$", + ) + + # Read a tbl_corr block, then sleep. The other session will retry the IO and + # also fail. The easiest thing to verify that seems to be to check that both + # are in the log. + log_location = node.current_log_position() + psql_a.query_safe("SELECT read_rel_block_ll('tbl_corr', 1, wait_complete=>false);") + + _psql_like( + psql_b, + "SELECT count(*) FROM tbl_corr WHERE ctid = '(1,1)' LIMIT 1", + r"^$", + r"invalid page in block", + ) + + # The log message issued for the read_rel_block_ll() should be logged as a + # LOG + node.wait_for_log(r"LOG[^\n]+invalid page in", log_location) + + # But for the SELECT, it should be an ERROR + node.wait_for_log(r"ERROR[^\n]+invalid page in", log_location) + + psql_a.quit() + psql_b.quit() + + +def _test_close_fd(node): + """Test that we deal correctly with FDs being closed while IO is in progress.""" + psql = node.background_psql("postgres", on_error_stop=False) + + _psql_like( + psql, + "\n" + "\t\t\tSELECT read_rel_block_ll('tbl_ok', 1,\n" + "\t\t\t\twait_complete=>true,\n" + "\t\t\t\tbatchmode_enter=>true,\n" + "\t\t\t\tsmgrreleaseall=>true,\n" + "\t\t\t\tbatchmode_exit=>true\n" + "\t\t\t);", + r"^$", + r"^$", + ) + + _psql_like( + psql, + "\n" + "\t\t\tSELECT read_rel_block_ll('tbl_ok', 1,\n" + "\t\t\t\twait_complete=>false,\n" + "\t\t\t\tbatchmode_enter=>true,\n" + "\t\t\t\tsmgrreleaseall=>true,\n" + "\t\t\t\tbatchmode_exit=>true\n" + "\t\t\t);", + r"^$", + r"^$", + ) + + # Check that another backend can read the relevant block + _psql_like( + psql, + "SELECT count(*) FROM tbl_ok WHERE ctid = '(1,1)' LIMIT 1", + r"^1$", + r"^$", + ) + + psql.quit() + + +def _test_inject(node): + """Tests using injection points, mostly to exercise hard IO errors.""" + psql = node.background_psql("postgres", on_error_stop=False) + + # injected what we'd expect + psql.query_safe("SELECT inj_io_short_read_attach(8192);") + psql.query_safe("SELECT invalidate_rel_block('tbl_ok', 2);") + _psql_like(psql, "SELECT count(*) FROM tbl_ok WHERE ctid = '(2, 1)'", r"^1$", r"^$") + + # injected a read shorter than a single block, expecting error + psql.query_safe("SELECT inj_io_short_read_attach(17);") + psql.query_safe("SELECT invalidate_rel_block('tbl_ok', 2);") + _psql_like( + psql, + "SELECT count(*) FROM tbl_ok WHERE ctid = '(2, 1)'", + r"^$", + r'ERROR:.*could not read blocks 2\.\.2 in file "base/.*": ' + r"read only 0 of 8192 bytes", + ) + + # shorten multi-block read to a single block, should retry + inval_query = ( + "SELECT invalidate_rel_block('tbl_ok', 0);\n" + "SELECT invalidate_rel_block('tbl_ok', 1);\n" + "SELECT invalidate_rel_block('tbl_ok', 2);\n" + "SELECT invalidate_rel_block('tbl_ok', 3);\n" + "/* gap */\n" + "SELECT invalidate_rel_block('tbl_ok', 5);\n" + "SELECT invalidate_rel_block('tbl_ok', 6);\n" + "SELECT invalidate_rel_block('tbl_ok', 7);\n" + "SELECT invalidate_rel_block('tbl_ok', 8);" + ) + + psql.query_safe(inval_query) + psql.query_safe("SELECT inj_io_short_read_attach(8192);") + _psql_like(psql, "SELECT count(*) FROM tbl_ok", r"^10000$", r"^$") + + # shorten multi-block read to two blocks, should retry + psql.query_safe(inval_query) + psql.query_safe("SELECT inj_io_short_read_attach(8192*2);") + _psql_like(psql, "SELECT count(*) FROM tbl_ok", r"^10000$", r"^$") + + # verify that page verification errors are detected even as part of a + # shortened multi-block read (tbl_corr, block 1 is corrupted) + psql.query_safe( + "\n" + "SELECT invalidate_rel_block('tbl_corr', 0);\n" + "SELECT invalidate_rel_block('tbl_corr', 1);\n" + "SELECT invalidate_rel_block('tbl_corr', 2);\n" + "SELECT inj_io_short_read_attach(8192);\n" + "\t" + ) + _psql_like( + psql, + "SELECT count(*) FROM tbl_corr WHERE ctid < '(2, 1)'", + r"^$", + r'ERROR:.*invalid page in block 1 of relation "base/.*', + ) + + # trigger a hard error, should error out + psql.query_safe( + "\n" + "SELECT inj_io_short_read_attach(-errno_from_string('EIO'));\n" + "SELECT invalidate_rel_block('tbl_ok', 2);\n" + "\t" + ) + hard_eio = ( + r'ERROR:.*could not read blocks 2\.\.2 in file "base/.*": ' + r"(?:I/O|Input/output) error" + ) + _psql_like(psql, "SELECT count(*) FROM tbl_ok", r"^$", hard_eio) + _psql_like(psql, "SELECT count(*) FROM tbl_ok", r"^$", hard_eio) + + psql.query_safe("SELECT inj_io_short_read_detach()") + + # now the IO should be ok. + _psql_like(psql, "SELECT count(*) FROM tbl_ok", r"^10000$", r"^$") + + # trigger a different hard error, should error out + psql.query_safe( + "\n" + "SELECT inj_io_short_read_attach(-errno_from_string('EROFS'));\n" + "SELECT invalidate_rel_block('tbl_ok', 2);\n" + "\t" + ) + _psql_like( + psql, + "SELECT count(*) FROM tbl_ok", + r"^$", + r'ERROR:.*could not read blocks 2\.\.2 in file "base/.*": ' + r"Read-only file system", + ) + psql.query_safe("SELECT inj_io_short_read_detach()") + + psql.quit() + + +def _test_inject_worker(node): + """Tests using injection points, only for io_method=worker (file reopen).""" + psql = node.background_psql("postgres", on_error_stop=False) + + # trigger a failure to reopen, should error out, but should recover + psql.query_safe( + "\nSELECT inj_io_reopen_attach();\n" + "SELECT invalidate_rel_block('tbl_ok', 1);\n\t" + ) + + _psql_like( + psql, + "SELECT count(*) FROM tbl_ok", + r"^$", + r'ERROR:.*could not read blocks 1\.\.1 in file "base/.*": ' + r"No such file or directory", + ) + + psql.query_safe("SELECT inj_io_reopen_detach();") + + # check that we indeed recover + _psql_like(psql, "SELECT count(*) FROM tbl_ok", r"^10000$", r"^$") + + psql.quit() + + +def _test_invalidate(node): + """Handle a relation being removed (rollback/DROP) while IO is ongoing.""" + psql = node.background_psql("postgres", on_error_stop=False) + + for persistency in ("normal", "unlogged", "temporary"): + sql_persistency = "" if persistency == "normal" else persistency + tblname = persistency + "_transactional" + + create_sql = ( + "\n" + "CREATE {persistency} TABLE {tbl} (id int not null, data text not null) " + "WITH (AUTOVACUUM_ENABLED = false);\n" + "INSERT INTO {tbl}(id, data) SELECT generate_series(1, 10000) as id, " + "repeat('a', 200);\n".format(persistency=sql_persistency, tbl=tblname) + ) + + # Verify that outstanding read IO does not cause problems with + # AbortTransaction -> smgrDoPendingDeletes -> smgrdounlinkall -> ... -> + # Invalidate[Local]Buffer. + psql.query_safe("BEGIN; {};".format(create_sql)) + psql.query_safe( + "\nSELECT read_rel_block_ll('{}', 1, wait_complete=>false);\n".format( + tblname + ) + ) + _psql_like(psql, "ROLLBACK", r"^$", r"^$") + + # Verify that outstanding read IO does not cause problems with + # CommitTransaction -> smgrDoPendingDeletes -> smgrdounlinkall -> ... -> + # Invalidate[Local]Buffer. + psql.query_safe("BEGIN; {}; COMMIT;".format(create_sql)) + psql.query_safe( + "\nBEGIN;\n" + "SELECT read_rel_block_ll('{}', 1, wait_complete=>false);\n".format(tblname) + ) + + _psql_like(psql, "DROP TABLE {}".format(tblname), r"^$", r"^$") + _psql_like(psql, "COMMIT", r"^$", r"^$") + + psql.quit() + + +def _test_zero(node): + """Test behavior related to ZERO_ON_ERROR and zero_damaged_pages.""" + psql_a = node.background_psql("postgres", on_error_stop=False) + psql_b = node.background_psql("postgres", on_error_stop=False) + + for persistency in ("normal", "temporary"): + sql_persistency = "" if persistency == "normal" else persistency + _zero_one_persistency(psql_a, psql_b, persistency, sql_persistency) + + psql_a.clear() + + psql_a.quit() + psql_b.quit() + + +def _zero_one_persistency(psql_a, psql_b, persistency, sql_persistency): + """One ZERO_ON_ERROR / zero_damaged_pages pass for a persistency level.""" + psql_a.query_safe( + "\nCREATE {} TABLE tbl_zero(id int) WITH (AUTOVACUUM_ENABLED = false);\n" + "INSERT INTO tbl_zero SELECT generate_series(1, 10000);\n".format( + sql_persistency + ) + ) + + psql_a.query_safe( + "\nSELECT modify_rel_block('tbl_zero', 0, corrupt_header=>true);\n" + ) + + # Check that page validity errors are detected + _psql_like( + psql_a, + "\nSELECT read_rel_block_ll('tbl_zero', 0, zero_on_error=>false)", + r"^$", + r"^psql::\d+: ERROR: invalid page in block 0 of relation " + r'"base/.*/.*$', + ) + + # Check that page validity errors are zeroed + _psql_like( + psql_a, + "\nSELECT read_rel_block_ll('tbl_zero', 0, zero_on_error=>true)", + r"^$", + r"^psql::\d+: WARNING: invalid page in block 0 of relation " + r'"base/.*/.*"; zeroing out page$', + ) + + # And that once the corruption is fixed, we can read again + psql_a.query("\nSELECT modify_rel_block('tbl_zero', 0, zero=>true);\n") + psql_a.clear() + + _psql_like( + psql_a, + "\nSELECT read_rel_block_ll('tbl_zero', 0, zero_on_error=>false)", + r"^$", + r"^$", + ) + + # Check a page validity error in another block, to ensure we report the + # correct block number + psql_a.query_safe( + "\nSELECT modify_rel_block('tbl_zero', 3, corrupt_header=>true);\n" + ) + _psql_like( + psql_a, + "SELECT read_rel_block_ll('tbl_zero', 3, zero_on_error=>true);", + r"^$", + r"^psql::\d+: WARNING: invalid page in block 3 of relation " + r'"base/.*/.*"; zeroing out page$', + ) + + _zero_multiblock(psql_a, persistency) + _zero_bufmgr(psql_a, psql_b, persistency, sql_persistency) + + # Clean up + psql_a.query_safe("\nDROP TABLE tbl_zero;\n") + + +def _zero_multiblock(psql_a, persistency): # pylint: disable=unused-argument + """Check one read reporting multiple invalid blocks (error/zero variants).""" + psql_a.query_safe( + "\nSELECT modify_rel_block('tbl_zero', 2, corrupt_header=>true);\n" + "SELECT modify_rel_block('tbl_zero', 3, corrupt_header=>true);\n" + ) + # First test error + _psql_like( + psql_a, + "SELECT read_rel_block_ll('tbl_zero', 1, nblocks=>4, zero_on_error=>false)", + r"^$", + r"^psql::\d+: ERROR: 2 invalid pages among blocks 1..4 of " + r'relation "base/.*/.*\nDETAIL: Block 2 held the first invalid page\.\n' + r"HINT:[^\n]+$", + ) + + # Then test zeroing via ZERO_ON_ERROR flag + _psql_like( + psql_a, + "SELECT read_rel_block_ll('tbl_zero', 1, nblocks=>4, zero_on_error=>true)", + r"^$", + r"^psql::\d+: WARNING: zeroing out 2 invalid pages among " + r'blocks 1..4 of relation "base/.*/.*\nDETAIL: Block 2 held the first ' + r"zeroed page\.\nHINT:[^\n]+$", + ) + + # Then test zeroing via zero_damaged_pages + _psql_like( + psql_a, + "\nBEGIN;\n" + "SET LOCAL zero_damaged_pages = true;\n" + "SELECT read_rel_block_ll('tbl_zero', 1, nblocks=>4, zero_on_error=>false)\n" + "COMMIT;\n", + r"^$", + r"^psql::\d+: WARNING: zeroing out 2 invalid pages among " + r'blocks 1..4 of relation "base/.*/.*\nDETAIL: Block 2 held the first ' + r"zeroed page\.\nHINT:[^\n]+$", + ) + + psql_a.query_safe("COMMIT") + + +def _zero_bufmgr(psql_a, psql_b, persistency, sql_persistency): + """Verify bufmgr.c IO detects / zeroes page validity errors.""" + psql_a.query( + "\nSELECT invalidate_rel_block('tbl_zero', g.i)\n" + "FROM generate_series(0, 15) g(i);\n" + "SELECT modify_rel_block('tbl_zero', 3, zero=>true);\n" + ) + psql_a.clear() + + _psql_like( + psql_a, + "\nSELECT count(*) FROM tbl_zero", + r"^$", + r"^psql::\d+: ERROR: invalid page in block 2 of relation " + r'"base/.*/.*$', + ) + + # Verify that bufmgr.c IO zeroes out pages with page validity errors + _psql_like( + psql_a, + "\nBEGIN;\n" + "SET LOCAL zero_damaged_pages = true;\n" + "SELECT count(*) FROM tbl_zero;\n" + "COMMIT;\n", + r"^\d+$", + r"^psql::\d+: WARNING: invalid page in block 2 of relation " + r'"base/.*/.*$', + ) + + # Check that warnings/errors about page validity in an IO started by session + # A that session B might complete aren't logged visibly to session B. + # + # This will only ever trigger for io_method's like io_uring, that can + # complete IO's in a client backend. But it doesn't seem worth restricting + # to that. + # + # This requires cross-session access to the same relation, hence the + # restriction to non-temporary table. + if sql_persistency != "temporary": + # Create a corruption and then read the block without waiting for + # completion. + psql_a.query( + "\nSELECT modify_rel_block('tbl_zero', 1, corrupt_header=>true);\n" + "SELECT read_rel_block_ll('tbl_zero', 1, wait_complete=>false, " + "zero_on_error=>true)\n" + ) + + _psql_like(psql_b, "SELECT count(*) > 0 FROM tbl_zero;", r"^t$", r"^$") + + +def _test_checksum(node): + """Test that we detect checksum failures and report them.""" + psql_a = node.background_psql("postgres", on_error_stop=False) + + psql_a.query_safe( + "\nCREATE TABLE tbl_normal(id int) WITH (AUTOVACUUM_ENABLED = false);\n" + "INSERT INTO tbl_normal SELECT generate_series(1, 5000);\n" + "SELECT modify_rel_block('tbl_normal', 3, corrupt_checksum=>true);\n" + "\n" + "CREATE TEMPORARY TABLE tbl_temp(id int) WITH (AUTOVACUUM_ENABLED = false);\n" + "INSERT INTO tbl_temp SELECT generate_series(1, 5000);\n" + "SELECT modify_rel_block('tbl_temp', 3, corrupt_checksum=>true);\n" + "SELECT modify_rel_block('tbl_temp', 4, corrupt_checksum=>true);\n" + ) + + # To be able to test checksum failures on shared rels we need a shared rel + # with invalid pages - which is a bit scary. pg_shseclabel seems like a good + # bet, as it's not accessed in a default configuration. + psql_a.query_safe( + "\nSELECT grow_rel('pg_shseclabel', 4);\n" + "SELECT modify_rel_block('pg_shseclabel', 2, corrupt_checksum=>true);\n" + "SELECT modify_rel_block('pg_shseclabel', 3, corrupt_checksum=>true);\n" + ) + + # normal rel + cs_count_before, _ = _checksum_failures(psql_a, "postgres") + _psql_like( + psql_a, + "\nSELECT read_rel_block_ll('tbl_normal', 3, nblocks=>1, " + "zero_on_error=>false);", + r"^$", + r"^psql::\d+: ERROR: invalid page in block 3 of relation " + r'"base/\d+/\d+"$', + ) + cs_count_after, cs_ts_after = _checksum_failures(psql_a, "postgres") + assert int(cs_count_before) + 1 <= int(cs_count_after), "normal rel checksum count" + assert cs_ts_after != "", "normal rel checksum timestamp is not null" + + # temp rel + cs_count_after, cs_ts_after = _checksum_failures(psql_a, "postgres") + _psql_like( + psql_a, + "\nSELECT read_rel_block_ll('tbl_temp', 4, nblocks=>2, " + "zero_on_error=>false);", + r"^$", + r"^psql::\d+: ERROR: invalid page in block 4 of relation " + r'"base/\d+/t\d+_\d+"$', + ) + cs_count_after, cs_ts_after = _checksum_failures(psql_a, "postgres") + assert int(cs_count_before) + 1 <= int(cs_count_after), "temp rel checksum count" + assert cs_ts_after != "", "temp rel checksum timestamp is not null" + + # shared rel + cs_count_before, cs_ts_after = _checksum_failures(psql_a) + _psql_like( + psql_a, + "\nSELECT read_rel_block_ll('pg_shseclabel', 2, nblocks=>2, " + "zero_on_error=>false);", + r"^$", + r"^psql::\d+: ERROR: 2 invalid pages among blocks 2..3 of " + r'relation "global/\d+"\nDETAIL: Block 2 held the first invalid page\.\n' + r"HINT:[^\n]+$", + ) + cs_count_after, cs_ts_after = _checksum_failures(psql_a) + assert int(cs_count_before) + 1 <= int(cs_count_after), "shared rel checksum count" + assert cs_ts_after != "", "shared rel checksum timestamp is not null" + + # and restore sanity + psql_a.query( + "\nSELECT modify_rel_block('pg_shseclabel', 1, zero=>true);\n" + "DROP TABLE tbl_normal;\n" + ) + psql_a.clear() + + psql_a.quit() + + +def _test_checksum_createdb(node): + """CREATE DATABASE from a source with an invalid block (cross-database IO).""" + psql = node.background_psql("postgres", on_error_stop=False) + + node.safe_psql("CREATE DATABASE regression_createdb_source") + + node.safe_psql( + "\nCREATE EXTENSION test_aio;\n" + "CREATE TABLE tbl_cs_fail(data int not null) " + "WITH (AUTOVACUUM_ENABLED = false);\n" + "INSERT INTO tbl_cs_fail SELECT generate_series(1, 1000);\n" + "SELECT modify_rel_block('tbl_cs_fail', 1, corrupt_checksum=>true);\n", + dbname="regression_createdb_source", + ) + + createdb_sql = ( + "\nCREATE DATABASE regression_createdb_target\n" + "TEMPLATE regression_createdb_source\n" + "STRATEGY wal_log;\n" + ) + + # Verify that CREATE DATABASE of an invalid database fails and is accounted + # for accurately. + cs_count_before, _ = _checksum_failures(psql, "regression_createdb_source") + _psql_like( + psql, + createdb_sql, + r"^$", + r"psql::\d+: ERROR: invalid page in block 1 of relation " + r'"base/\d+/\d+"$', + ) + cs_count_after, _ = _checksum_failures(psql, "regression_createdb_source") + assert int(cs_count_before) + 1 <= int( + cs_count_after + ), "create database w/ wal strategy, invalid source: checksum count increased" + + # Verify that CREATE DATABASE of the fixed database succeeds. + node.safe_psql( + "\nSELECT modify_rel_block('tbl_cs_fail', 1, zero=>true);\n", + dbname="regression_createdb_source", + ) + _psql_like(psql, createdb_sql, r"^$", r"^$") + + psql.quit() + + +def _ignore_checksum_basic(psql, count_sql, invalidate_sql, expect): + """Very basic ignore_checksum_failure=off / on tests.""" + psql.query_safe( + "\nSELECT modify_rel_block('tbl_cs_fail', 1, corrupt_checksum=>true);\n" + "SELECT modify_rel_block('tbl_cs_fail', 5, corrupt_checksum=>true);\n" + "SELECT modify_rel_block('tbl_cs_fail', 6, corrupt_checksum=>true);\n" + ) + + psql.query_safe(invalidate_sql) + _psql_like(psql, count_sql, r"^$", r"ERROR: invalid page in block") + + psql.query_safe("SET ignore_checksum_failure=on") + + psql.query_safe(invalidate_sql) + _psql_like( + psql, + count_sql, + r"^{}$".format(expect), + r"WARNING: ignoring (checksum failure|\d checksum failures)", + ) + + +def _ignore_checksum_multiblock(node, psql): + """Verify ignore_checksum_failure=off works in multi-block reads.""" + psql.query_safe( + "\nSELECT modify_rel_block('tbl_cs_fail', 2, zero=>true);\n" + "SELECT modify_rel_block('tbl_cs_fail', 3, corrupt_checksum=>true);\n" + "SELECT modify_rel_block('tbl_cs_fail', 4, corrupt_header=>true);\n" + ) + + log_location = node.current_log_position() + _psql_like( + psql, + "\nSELECT read_rel_block_ll('tbl_cs_fail', 3, nblocks=>1, " + "zero_on_error=>false);", + r"^$", + r"^psql::\d+: WARNING: ignoring checksum failure in block 3", + ) + + # Check that the log contains a LOG message about the failure + node.wait_for_log(r"LOG: ignoring checksum failure", log_location) + + # check that we error + _psql_like( + psql, + "\nSELECT read_rel_block_ll('tbl_cs_fail', 2, nblocks=>3, " + "zero_on_error=>false);", + r"^$", + r"^psql::\d+: ERROR: invalid page in block 4 of relation " + r'"base/\d+/\d+"$', + ) + + +def _ignore_checksum_multiproblem(node, psql): + """Multi-block read with different problems in different blocks.""" + psql.query( + "\nSELECT modify_rel_block('tbl_cs_fail', 1, zero=>true);\n" + "SELECT modify_rel_block('tbl_cs_fail', 2, corrupt_checksum=>true);\n" + "SELECT modify_rel_block('tbl_cs_fail', 3, corrupt_checksum=>true, " + "corrupt_header=>true);\n" + "SELECT modify_rel_block('tbl_cs_fail', 4, corrupt_header=>true);\n" + "SELECT modify_rel_block('tbl_cs_fail', 5, corrupt_header=>true);\n" + ) + psql.clear() + + log_location = node.current_log_position() + _psql_like( + psql, + "\nSELECT read_rel_block_ll('tbl_cs_fail', 1, nblocks=>5, " + "zero_on_error=>true);", + r"^$", + r"^psql::\d+: WARNING: zeroing 3 page\(s\) and ignoring 2 " + r'checksum failure\(s\) among blocks 1..5 of relation "', + ) + + # Unfortunately have to scan the whole log since determining $log_location + # above in each of the tests, as wait_for_log() returns the size of the + # file. + node.wait_for_log(r"LOG: ignoring checksum failure in block 2", log_location) + node.wait_for_log( + r'LOG: invalid page in block 3 of relation "base.*"; zeroing out page', + log_location, + ) + node.wait_for_log( + r'LOG: invalid page in block 4 of relation "base.*"; zeroing out page', + log_location, + ) + node.wait_for_log( + r'LOG: invalid page in block 5 of relation "base.*"; zeroing out page', + log_location, + ) + + +def _ignore_checksum_both(psql): + """Reading a page with both an invalid header and an invalid checksum.""" + psql.query( + "\nSELECT modify_rel_block('tbl_cs_fail', 3, corrupt_checksum=>true, " + "corrupt_header=>true);\n" + ) + psql.clear() + + _psql_like( + psql, + "\nSELECT read_rel_block_ll('tbl_cs_fail', 3, nblocks=>1, " + "zero_on_error=>false);", + r"^$", + r"^psql::\d+: ERROR: invalid page in block 3 of relation \"", + ) + + _psql_like( + psql, + "\nSELECT read_rel_block_ll('tbl_cs_fail', 3, nblocks=>1, " + "zero_on_error=>true);", + r"^$", + r"^psql::\d+: WARNING: invalid page in block 3 of relation " + r'"base/.*"; zeroing out page', + ) + + +def _test_ignore_checksum(node): + """Test detecting/ignoring checksum failures, with per-block log detail.""" + psql = node.background_psql("postgres", on_error_stop=False) + + # Test setup + psql.query_safe( + "\nCREATE TABLE tbl_cs_fail(id int) WITH (AUTOVACUUM_ENABLED = false);\n" + "INSERT INTO tbl_cs_fail SELECT generate_series(1, 10000);\n" + ) + + count_sql = "SELECT count(*) FROM tbl_cs_fail" + invalidate_sql = ( + "\nSELECT invalidate_rel_block('tbl_cs_fail', g.i)\n" + "FROM generate_series(0, 6) g(i);\n" + ) + + expect = psql.query_safe(count_sql) + + _ignore_checksum_basic(psql, count_sql, invalidate_sql, expect) + _ignore_checksum_multiblock(node, psql) + _ignore_checksum_multiproblem(node, psql) + _ignore_checksum_both(psql) + + psql.quit() + + +def _read_buffers_combine( + psql_a, persistency, table +): # pylint: disable=unused-argument + """Combining / hit-splitting cases for read_buffers().""" + # check that consecutive misses are combined into one read + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd, nblocks FROM " + "read_buffers('{}', 0, 2)".format(table), + r"^0\|0\|t\|2$", + r"^$", + ) + + # but if we do it again, i.e. it's in the buffer pool, there will be two + # operations + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd, nblocks FROM " + "read_buffers('{}', 0, 2)".format(table), + r"^0\|0\|f\|1\n1\|1\|f\|1$", + r"^$", + ) + + # Check that a larger read interrupted by a hit works + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd, nblocks FROM " + "read_buffers('{}', 3, 1)".format(table), + r"^0\|3\|t\|1$", + r"^$", + ) + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd, nblocks FROM " + "read_buffers('{}', 2, 4)".format(table), + r"^0\|2\|t\|1\n1\|3\|f\|1\n2\|4\|t\|2$", + r"^$", + ) + + +def _read_buffers_hits(psql_a, table): + """Reads with initial buffer hits / trailing hits, and io_combine_limit.""" + # Verify that a read with an initial buffer hit works + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + for sql, expected in ( + ("read_buffers('{}', 0, 1)".format(table), r"^0\|0\|t\|1$"), + ("read_buffers('{}', 0, 1)".format(table), r"^0\|0\|f\|1$"), + ("read_buffers('{}', 1, 1)".format(table), r"^0\|1\|t\|1$"), + ("read_buffers('{}', 1, 1)".format(table), r"^0\|1\|f\|1$"), + ("read_buffers('{}', 0, 2)".format(table), r"^0\|0\|f\|1\n1\|1\|f\|1$"), + ( + "read_buffers('{}', 0, 3)".format(table), + r"^0\|0\|f\|1\n1\|1\|f\|1\n2\|2\|t\|1$", + ), + ): + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd, nblocks FROM " + sql, + expected, + r"^$", + ) + + # Verify that a read with an initial miss and trailing buffer hit(s) works + psql_a.query_safe("SELECT invalidate_rel_block('{}', 0)".format(table)) + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd, nblocks FROM " + "read_buffers('{}', 0, 3)".format(table), + r"^0\|0\|t\|1\n1\|1\|f\|1\n2\|2\|f\|1$", + r"^$", + ) + psql_a.query_safe("SELECT invalidate_rel_block('{}', 1)".format(table)) + psql_a.query_safe("SELECT invalidate_rel_block('{}', 2)".format(table)) + psql_a.query_safe("SELECT * FROM read_buffers('{}', 3, 2)".format(table)) + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd, nblocks FROM " + "read_buffers('{}', 1, 4)".format(table), + r"^0\|1\|t\|2\n2\|3\|f\|1\n3\|4\|f\|1$", + r"^$", + ) + + # Verify that we aren't doing reads larger than io_combine_limit. That's + # just enforced in read_buffers() function, but kinda still worth testing. + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + psql_a.query_safe("SET io_combine_limit=3") + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd, nblocks FROM " + "read_buffers('{}', 1, 5)".format(table), + r"^0\|1\|t\|3\n3\|4\|t\|2$", + r"^$", + ) + psql_a.query_safe("RESET io_combine_limit") + + +def _read_buffers_inprogress(psql_a, table): + """Encountering in-progress IO at the start/middle/end of the range.""" + # Test encountering buffer IO we started in the first block of the range. + # + # Depending on how quick the IO we start completes, the IO might be + # completed or we "join" the foreign IO. To hide that variability, the query + # below treats a foreign IO as not having needed to do IO. + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + psql_a.query_safe( + "SELECT read_rel_block_ll('{}', 1, wait_complete=>false)".format(table) + ) + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd and not foreign_io, nblocks FROM " + "read_buffers('{}', 1, 3)".format(table), + r"^0\|1\|f\|1\n1\|2\|t\|2$", + r"^$", + ) + + # Test in-progress IO in the middle block of the range + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + psql_a.query_safe( + "SELECT read_rel_block_ll('{}', 2, wait_complete=>false)".format(table) + ) + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd and not foreign_io, nblocks FROM " + "read_buffers('{}', 1, 3)".format(table), + r"^0\|1\|t\|1\n1\|2\|f\|1\n2\|3\|t\|1$", + r"^$", + ) + + # Test in-progress IO on the last block of the range + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + psql_a.query_safe( + "SELECT read_rel_block_ll('{}', 3, wait_complete=>false)".format(table) + ) + _psql_like( + psql_a, + "SELECT blockoff, blocknum, io_reqd and not foreign_io, nblocks FROM " + "read_buffers('{}', 1, 3)".format(table), + r"^0\|1\|t\|2\n2\|3\|f\|1$", + r"^$", + ) + + +def _read_buffers_split(node, io_method, psql_a, psql_b): + """Start buffer IO splits an IO if there's concurrent IO in progress.""" + table = "tbl_ok" + persistency = "normal" + + # Test start buffer IO will split IO if there's IO in progress. We can't + # observe this with sync, as that does not start the IO operation in + # StartReadBuffers(). + if io_method == "sync": + return + + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + + buf_id = psql_b.query_safe("SELECT buffer_create_toy('{}', 3)".format(table)) + psql_b.query_safe( + "SELECT buffer_call_start_io({}, for_input=>true, wait=>true)".format(buf_id) + ) + + _query_wait_block( + node, + psql_a, + "SELECT blockoff, blocknum, io_reqd, foreign_io, nblocks FROM " + "read_buffers('{}', 1, 5);\n".format(table), + "BufferIo", + 1, + ) + psql_b.query_safe( + "SELECT buffer_call_terminate_io({}, for_input=>true, succeed=>false, " + "io_error=>false, release_aio=>false)".format(buf_id) + ) + # Because no IO wref was assigned, block 3 should not report foreign IO + psql_a.query_until(r"0\|1\|t\|f\|2\n2\|3\|t\|f\|3") + psql_a.clear() + # {io_method}: {persistency}: IO was split due to concurrent failed IO + + # Same as before, except the concurrent IO succeeds this time + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + buf_id = psql_b.query_safe("SELECT buffer_create_toy('{}', 3)".format(table)) + psql_b.query_safe( + "SELECT buffer_call_start_io({}, for_input=>true, wait=>true)".format(buf_id) + ) + + _query_wait_block( + node, + psql_a, + "SELECT blockoff, blocknum, io_reqd, foreign_io, nblocks FROM " + "read_buffers('{}', 1, 5);\n".format(table), + "BufferIo", + 1, + ) + psql_b.query_safe( + "SELECT buffer_call_terminate_io({}, for_input=>true, succeed=>true, " + "io_error=>false, release_aio=>false)".format(buf_id) + ) + # Because no IO wref was assigned, block 3 should not report foreign IO + psql_a.query_until(r"0\|1\|t\|f\|2\n2\|3\|f\|f\|1\n3\|4\|t\|f\|2") + psql_a.clear() + assert persistency == "normal" + + +def _test_read_buffers(io_method, node): + """Tests for StartReadBuffers().""" + psql_a = node.background_psql("postgres", on_error_stop=False) + psql_b = node.background_psql("postgres", on_error_stop=False) + + psql_a.query_safe( + "\nCREATE TEMPORARY TABLE tmp_ok(data int not null);\n" + "INSERT INTO tmp_ok SELECT generate_series(1, 5000);\n" + ) + + for persistency in ("normal", "temporary"): + table = "tbl_ok" if persistency == "normal" else "tmp_ok" + _read_buffers_combine(psql_a, persistency, table) + _read_buffers_hits(psql_a, table) + _read_buffers_inprogress(psql_a, table) + + # The remaining tests don't make sense for temp tables, as they are + # concerned with multiple sessions interacting with each other. + _read_buffers_split(node, io_method, psql_a, psql_b) + + psql_a.quit() + psql_b.quit() + + +def _read_buffers_inject_one(node, io_method, psqls): + """One foreign-IO read_buffers scenario (single in-progress block).""" + psql_a, psql_b, psql_c = psqls + table = "tbl_ok" + + # Test if a read buffers encounters AIO in progress by another backend, it + # recognizes that other IO as a foreign IO. + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + + # B: Trigger wait in the next AIO read for block 1. + psql_b.query_safe( + "SELECT inj_io_completion_wait(pid=>pg_backend_pid(),\n" + "\t\t relfilenode=>pg_relation_filenode('{}'),\n" + "\t\t blockno=>1);".format(table) + ) + + # B: Read block 1 and wait for the completion hook to be reached (which + # could be in B itself or in an IO worker) + _query_wait_block( + node, + psql_b, + "SELECT read_rel_block_ll('{}', blockno=>1, nblocks=>1)".format(table), + "completion_wait", + 0, + ) + + # A: Start read, wait until we're waiting for IO completion + _query_wait_block( + node, + psql_a, + "SELECT blockoff, blocknum, io_reqd, foreign_io, nblocks FROM " + "read_buffers('{}', 1, 4)".format(table), + "AioIoCompletion", + 1, + ) + + # C: Release B from completion hook + psql_c.query_safe("SELECT inj_io_completion_continue()") + + # A: Check that we recognized the foreign IO wait, if possible + # + # Due to sync mode not actually issuing IO below StartReadBuffers(), we + # can't observe encountering foreign IO. It still seems worth exercising + # these paths however. + if io_method != "sync": + # A foreign IO covering block 1, and one IO covering blocks 2-4. + expected = r"0\|1\|t\|t\|1\n1\|2\|t\|f\|3" + else: + # One IO covering everything, as that's what StartReadBuffers() will + # return for something with misses in sync mode. + expected = r"0\|1\|t\|f\|4" + psql_a.query_until(expected) + psql_a.clear() + + +def _read_buffers_inject_two(node, io_method, psqls): + """Foreign-IO read_buffers scenario encountered multiple times.""" + psql_a, psql_b, psql_c = psqls + table = "tbl_ok" + + # Test if a read buffers encounters AIO in progress by another backend, it + # recognizes that other IO as a foreign IO. This time we encounter the + # foreign IO multiple times. + psql_a.query_safe("SELECT evict_rel('{}')".format(table)) + + # B: Trigger wait in the next AIO read for block 3. + psql_b.query_safe( + "SELECT inj_io_completion_wait(pid=>pg_backend_pid(),\n" + "\t\t relfilenode=>pg_relation_filenode('{}'),\n" + "\t\t blockno=>3);".format(table) + ) + + # B: Read block 2-3 and wait for the completion hook to be reached (which + # could be in B itself or in an IO worker) + _query_wait_block( + node, + psql_b, + "SELECT read_rel_block_ll('{}', blockno=>2, nblocks=>2)".format(table), + "completion_wait", + 0, + ) + + # A: Start read, wait until we're waiting for IO completion + # + # Note that we need to defer waiting for IO until the end of read_buffers(), + # to be able to see that the IO on 3 is still in progress. + _query_wait_block( + node, + psql_a, + "SELECT blockoff, blocknum, io_reqd, foreign_io, nblocks FROM\n" + "read_buffers('{}', 0, 4)".format(table), + "AioIoCompletion", + 1, + ) + + # C: Release B from completion hook + psql_c.query_safe("SELECT inj_io_completion_continue()") + + # A: Check that we recognized the foreign IO wait, if possible + # + # See comment further up about sync mode. + if io_method != "sync": + # One IO covering blocks 0-1, A foreign IO covering block 2, and a + # foreign IO covering block 3 (same wref as for block 2). + expected = r"0\|0\|t\|f\|2\n2\|2\|t\|t\|1\n3\|3\|t\|t\|1" + else: + # One IO covering everything, as that's what StartReadBuffers() will + # return for something with misses in sync mode. + expected = r"0\|0\|t\|f\|4" + psql_a.query_until(expected) + psql_a.clear() + + +def _test_read_buffers_inject(io_method, node): + """Tests for StartReadBuffers() that depend on injection point support.""" + psql_a = node.background_psql("postgres", on_error_stop=False) + psql_b = node.background_psql("postgres", on_error_stop=False) + psql_c = node.background_psql("postgres", on_error_stop=False) + + # We can't easily test waiting for foreign IOs on temporary tables, as the + # waiting in the completion hook will just stall the backend. For worker + # that is because temporary table IO is executed synchronously, for io_uring + # the completion will be executed in the same process, but due to temporary + # tables not being shared, we can't do the wait in another backend. + psqls = (psql_a, psql_b, psql_c) + _read_buffers_inject_one(node, io_method, psqls) + _read_buffers_inject_two(node, io_method, psqls) + + psql_a.quit() + psql_b.quit() + psql_c.quit() + + +def _test_io_method(io_method, node): + """Run all tests for the specified node / io_method.""" + assert ( + node.safe_psql("SHOW io_method") == io_method + ), "{}: io_method set correctly".format(io_method) + + node.safe_psql( + "\nCREATE EXTENSION test_aio;\n" + "CREATE TABLE tbl_corr(data int not null) " + "WITH (AUTOVACUUM_ENABLED = false);\n" + "CREATE TABLE tbl_ok(data int not null) " + "WITH (AUTOVACUUM_ENABLED = false);\n" + "\n" + "INSERT INTO tbl_corr SELECT generate_series(1, 10000);\n" + "INSERT INTO tbl_ok SELECT generate_series(1, 10000);\n" + "SELECT grow_rel('tbl_corr', 16);\n" + "SELECT grow_rel('tbl_ok', 16);\n" + "\n" + "SELECT modify_rel_block('tbl_corr', 1, corrupt_header=>true);\n" + "CHECKPOINT;\n" + ) + + _test_handle(node) + _test_io_error(node) + _test_batchmode(node) + _test_startwait_io(node) + _test_complete_foreign(node) + _test_close_fd(node) + _test_invalidate(node) + _test_zero(node) + _test_checksum(node) + _test_ignore_checksum(node) + _test_checksum_createdb(node) + _test_read_buffers(io_method, node) + + # generic injection tests + if os.environ.get("enable_injection_points") == "yes": + _test_inject(node) + _test_read_buffers_inject(io_method, node) + + # worker specific injection tests + if io_method == "worker": + if os.environ.get("enable_injection_points") == "yes": + _test_inject_worker(node) + + +def test_001_aio(create_pg): + """Create one node per io_method, configure, and run every test in turn.""" + methods = testaio.supported_io_methods() + nodes = {} + + # Create and configure one instance for each io_method + for method in methods: + node = create_pg(method, start=False) + nodes[method] = node + node.append_conf("io_method={}".format(method)) + testaio.configure(node) + + # Just to have one test not use the default auto-tuning + nodes["sync"].append_conf("\n io_max_concurrency=4\n") + + # Execute the tests for each io_method + for method in methods: + node = nodes[method] + node.start() + _test_io_method(method, node) + node.stop() diff --git a/src/test/modules/test_aio/pyt/test_002_io_workers.py b/src/test/modules/test_aio/pyt/test_002_io_workers.py new file mode 100644 index 0000000000000..6f4e0922fcde3 --- /dev/null +++ b/src/test/modules/test_aio/pyt/test_002_io_workers.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_aio/t/002_io_workers.pl. + +Test changing the number of I/O worker processes while also evaluating the +handling of their termination. +""" + +import random +import re + + +def _check_io_worker_count(node, worker_count): + assert node.poll_query_until( + "SELECT COUNT(*) FROM pg_stat_activity WHERE backend_type = 'io worker'", + str(worker_count), + ), "io worker count is {}".format(worker_count) + + +def _terminate_io_worker(node, worker_count): # pylint: disable=unused-argument + # Select a random io worker. + pid = node.safe_psql( + "SELECT pid FROM pg_stat_activity WHERE\n" + "\t\t\tbackend_type = 'io worker' ORDER BY RANDOM() LIMIT 1" + ) + + # terminate IO worker with SIGINT + node.command_ok( + ["pg_ctl", "kill", "INT", pid], + "random io worker process signalled with INT", + ) + + # Check that worker exits + assert node.poll_query_until( + "SELECT COUNT(*) FROM pg_stat_activity WHERE pid = {}".format(pid), "0" + ), "random io worker process exited after signal" + + +def _change_number_of_io_workers(node, worker_count, prev_worker_count, expect_failure): + result = node.psql_capture( + "ALTER SYSTEM SET io_min_workers = {}".format(worker_count) + ) + node.safe_psql("SELECT pg_reload_conf()") + + if expect_failure: + assert re.search( + r'{} is outside the valid range for parameter "io_min_workers"'.format( + worker_count + ), + result.stderr, + ), "updating io_min_workers to {} failed, as expected".format(worker_count) + return prev_worker_count + + assert node.safe_psql("SHOW io_min_workers") == str( + worker_count + ), "updating number of io_min_workers from {} to {}".format( + prev_worker_count, worker_count + ) + + _check_io_worker_count(node, worker_count) + _terminate_io_worker(node, worker_count) + _check_io_worker_count(node, worker_count) + + return worker_count + + +def _test_number_of_io_workers_dynamic(node): + prev_worker_count = node.safe_psql("SHOW io_min_workers") + + # Verify that worker count can't be set to 0 + _change_number_of_io_workers(node, 0, prev_worker_count, True) + + # Verify that worker count can't be set to 33 (above the max) + _change_number_of_io_workers(node, 33, prev_worker_count, True) + + # Try changing IO workers to a random value and verify that the worker count + # ends up as expected. Always test the min/max of workers. + # + # Valid range for io_workers is [1, 32]. 8 tests in total seems reasonable. + io_workers_range = list(range(1, 33)) + random.shuffle(io_workers_range) + for worker_count in (1, 32, io_workers_range[0], io_workers_range[6]): + prev_worker_count = _change_number_of_io_workers( + node, worker_count, prev_worker_count, False + ) + + +def test_002_io_workers(create_pg): + """Dynamically resize the io worker pool and verify termination handling.""" + node = create_pg("worker", start=False) + node.append_conf( + "\n" + "io_method=worker\n" + "io_worker_idle_timeout=0ms\n" + "io_worker_launch_interval=0ms\n" + "io_max_workers=32\n" + ) + + node.start() + + # Test changing the number of I/O worker processes while also evaluating the + # handling of their termination. + _test_number_of_io_workers_dynamic(node) + + node.stop() diff --git a/src/test/modules/test_aio/pyt/test_003_initdb.py b/src/test/modules/test_aio/pyt/test_003_initdb.py new file mode 100644 index 0000000000000..ef51147cccffd --- /dev/null +++ b/src/test/modules/test_aio/pyt/test_003_initdb.py @@ -0,0 +1,59 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_aio/t/003_initdb.pl. + +Test initdb for each IO method. This is done separately from 001_aio.pl, as it +isn't fast. This way the more commonly failing / hacked-on 001_aio.pl can be +iterated on more quickly. +""" + +import os + +import testaio # pyrefly: ignore + + +def _test_create_node(io_method, create_pg): + # Want to test initdb for each IO method, otherwise we could just reuse the + # cluster. + # + # Unfortunately, when PG_TEST_INITDB_EXTRA_OPTS contains -c io_method=xyz it + # is applied after our own ->extra options and would break this test. Fix + # that up if we detect it, mirroring the Perl test's local-env override. + extra_opts = os.environ.get("PG_TEST_INITDB_EXTRA_OPTS") + saved = extra_opts + try: + if extra_opts is not None and "io_method=" in extra_opts: + os.environ["PG_TEST_INITDB_EXTRA_OPTS"] = ( + extra_opts + " -c io_method={}".format(io_method) + ) + + node = create_pg( + io_method, extra=["-c", "io_method={}".format(io_method)], start=False + ) + + testaio.configure(node) + + # Even though we used -c io_method=... above, if TEMP_CONFIG sets + # io_method, it'd override the setting persisted at initdb time. While + # using (and later verifying) the setting from initdb provides some + # verification of having used the io_method during initdb, it's probably + # not worth the complication of only appending if the variable is set in + # TEMP_CONFIG. + node.append_conf("\nio_method={}\n".format(io_method)) + + # io_method: initdb + node.start() + node.stop() + # io_method: start & stop + return node + finally: + if saved is None: + os.environ.pop("PG_TEST_INITDB_EXTRA_OPTS", None) + else: + os.environ["PG_TEST_INITDB_EXTRA_OPTS"] = saved + + +def test_003_initdb(create_pg): + """Run initdb + start/stop once per supported io_method.""" + for method in testaio.supported_io_methods(): + _test_create_node(method, create_pg) diff --git a/src/test/modules/test_aio/pyt/test_004_read_stream.py b/src/test/modules/test_aio/pyt/test_004_read_stream.py new file mode 100644 index 0000000000000..c2f3ee0a2b28b --- /dev/null +++ b/src/test/modules/test_aio/pyt/test_004_read_stream.py @@ -0,0 +1,242 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_aio/t/004_read_stream.pl. + +Exercises read-stream behaviour across io methods: repeatedly missing/hitting +the same blocks (normal and temp tables), and -- when the build has injection +points -- a read stream encountering buffers undergoing IO in another backend +(succeeding, failing, and two buffers in one IO). +""" + +import os + +import testaio # pyrefly: ignore + + +def _test_repeated_blocks(io_method, node): + psql = node.background_psql("postgres", on_error_stop=False) + + # Preventing larger reads makes testing easier + psql.query_safe("SET io_combine_limit = 1") + + # test miss of the same block twice in a row + psql.query_safe("SELECT evict_rel('largeish');") + + # block 0 grows the distance enough that the stream will look ahead and try + # to start a pending read for block 2 (and later block 4) twice before + # returning any buffers. + psql.query_safe( + "SELECT * FROM read_stream_for_blocks('largeish',\n" + "\t\t ARRAY[0, 2, 2, 4, 4]);" + ) + # {io_method}: stream missing the same block repeatedly + + psql.query_safe( + "SELECT * FROM read_stream_for_blocks('largeish',\n" + "\t\t ARRAY[0, 2, 2, 4, 4]);" + ) + # {io_method}: stream hitting the same block repeatedly + + # test hit of the same block twice in a row + psql.query_safe("SELECT evict_rel('largeish');") + psql.query_safe( + "SELECT * FROM read_stream_for_blocks('largeish',\n" + "\t\t ARRAY[0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1, 0]);" + ) + # {io_method}: stream accessing same block + + # Test repeated blocks with a temp table, using invalidate_rel_block() to + # evict individual local buffers. + psql.query_safe( + "CREATE TEMP TABLE largeish_temp(k int not null) WITH (FILLFACTOR=10);\n" + "\t\t INSERT INTO largeish_temp(k) SELECT generate_series(1, 200);" + ) + + # Evict the specific blocks we'll request to force misses + psql.query_safe("SELECT invalidate_rel_block('largeish_temp', 0);") + psql.query_safe("SELECT invalidate_rel_block('largeish_temp', 2);") + psql.query_safe("SELECT invalidate_rel_block('largeish_temp', 4);") + + psql.query_safe( + "SELECT * FROM read_stream_for_blocks('largeish_temp',\n" + "\t\t ARRAY[0, 2, 2, 4, 4]);" + ) + # {io_method}: temp stream missing the same block repeatedly + + # Now the blocks are cached, so repeated access should be hits + psql.query_safe( + "SELECT * FROM read_stream_for_blocks('largeish_temp',\n" + "\t\t ARRAY[0, 2, 2, 4, 4]);" + ) + # {io_method}: temp stream hitting the same block repeatedly + + psql.quit() + + +def _wait_completion_wait(node): + assert node.poll_query_until( + "SELECT wait_event FROM pg_stat_activity\n" + "\t\t\tWHERE wait_event = 'completion_wait';", + "completion_wait", + ) + + +def _foreign_succeeding(node, psql_a, psql_b, pid_a): + # Test read stream encountering buffers undergoing IO in another backend, + # with the other backend's reads succeeding. + psql_a.query_safe("SELECT evict_rel('largeish');") + + psql_b.query_safe( + "SELECT inj_io_completion_wait(pid=>pg_backend_pid(),\n" + "\t\t relfilenode=>pg_relation_filenode('largeish'));" + ) + + psql_b.send("SELECT read_rel_block_ll('largeish',\n\t\tblockno=>5, nblocks=>1);\n") + + _wait_completion_wait(node) + + # Block 5 is undergoing IO in session b, so session a will move on to start + # a new IO for block 7. + psql_a.send( + "SELECT array_agg(blocknum) FROM\n" + "\t\tread_stream_for_blocks('largeish', ARRAY[0, 2, 5, 7]);\n" + ) + + assert node.poll_query_until( + "SELECT wait_event FROM pg_stat_activity WHERE pid = {}".format(pid_a), + "AioIoCompletion", + ) + + node.safe_psql("SELECT inj_io_completion_continue()") + + psql_a.query_until(r"\{0,2,5,7\}") + psql_a.clear() + # {io_method}: read stream encounters succeeding IO by another backend + + +def _foreign_failing(node, psql_a, psql_b, pid_a): + # Test read stream encountering buffers undergoing IO in another backend, + # with the other backend's reads failing. + psql_a.query_safe("SELECT evict_rel('largeish');") + + psql_b.query_safe( + "SELECT inj_io_completion_wait(pid=>pg_backend_pid(),\n" + "\t\t relfilenode=>pg_relation_filenode('largeish'));" + ) + + psql_b.query_safe( + "SELECT inj_io_short_read_attach(-errno_from_string('EIO'),\n" + "\t\t pid=>pg_backend_pid(),\n" + "\t\t relfilenode=>pg_relation_filenode('largeish'));" + ) + + psql_b.send("SELECT read_rel_block_ll('largeish',\n\t\tblockno=>5, nblocks=>1);\n") + + _wait_completion_wait(node) + + psql_a.send( + "SELECT array_agg(blocknum) FROM\n" + "\t\tread_stream_for_blocks('largeish', ARRAY[0, 2, 5, 7]);\n" + ) + + assert node.poll_query_until( + "SELECT wait_event FROM pg_stat_activity WHERE pid = {}".format(pid_a), + "AioIoCompletion", + ) + + node.safe_psql("SELECT inj_io_completion_continue()") + + psql_a.query_until(r"\{0,2,5,7\}") + psql_a.clear() + + psql_b.wait_for_stderr(r"ERROR.*could not read blocks 5\.\.5") + # {io_method}: injected error occurred + psql_b.query_safe("SELECT inj_io_short_read_detach();") + # {io_method}: read stream encounters failing IO by another backend + + +def _foreign_two_buffers(node, psql_a, psql_b, pid_a): + # Test read stream encountering two buffers that are undergoing the same IO, + # started by another backend. + psql_a.query_safe("SELECT evict_rel('largeish');") + + psql_b.query_safe( + "SELECT inj_io_completion_wait(pid=>pg_backend_pid(),\n" + "\t\t relfilenode=>pg_relation_filenode('largeish'));" + ) + + psql_b.send("SELECT read_rel_block_ll('largeish',\n\t\tblockno=>2, nblocks=>3);\n") + + _wait_completion_wait(node) + + # Blocks 2 and 4 are undergoing IO initiated by session b + psql_a.send( + "SELECT array_agg(blocknum) FROM\n" + "\t\tread_stream_for_blocks('largeish', ARRAY[0, 2, 4]);\n" + ) + + assert node.poll_query_until( + "SELECT wait_event FROM pg_stat_activity WHERE pid = {}".format(pid_a), + "AioIoCompletion", + ) + + node.safe_psql("SELECT inj_io_completion_continue()") + + psql_a.query_until(r"\{0,2,4\}") + psql_a.clear() + # {io_method}: read stream encounters two buffer read in one IO + + +def _test_inject_foreign(io_method, node): # pylint: disable=unused-argument + psql_a = node.background_psql("postgres", on_error_stop=False) + psql_b = node.background_psql("postgres", on_error_stop=False) + + pid_a = psql_a.query_safe("SELECT pg_backend_pid();") + + _foreign_succeeding(node, psql_a, psql_b, pid_a) + _foreign_failing(node, psql_a, psql_b, pid_a) + _foreign_two_buffers(node, psql_a, psql_b, pid_a) + + psql_a.quit() + psql_b.quit() + + +def _test_setup(node): + node.safe_psql( + "\n" + "CREATE EXTENSION test_aio;\n" + "\n" + "CREATE TABLE largeish(k int not null) WITH (FILLFACTOR=10);\n" + "INSERT INTO largeish(k) SELECT generate_series(1, 10000);\n" + ) + # setup + + +def _test_io_method(io_method, node): + assert ( + node.safe_psql("SHOW io_method") == io_method + ), "{}: io_method set correctly".format(io_method) + + _test_repeated_blocks(io_method, node) + + if os.environ.get("enable_injection_points") == "yes": + _test_inject_foreign(io_method, node) + + +def test_004_read_stream(create_pg): + """Drive the read-stream scenarios for each supported io_method.""" + node = create_pg("test", start=False) + + testaio.configure(node) + + node.append_conf("\nmax_connections=8\nio_method=worker\n") + + node.start() + _test_setup(node) + node.stop() + + for method in testaio.supported_io_methods(): + node.adjust_conf("io_method", method) + node.start() + _test_io_method(method, node) + node.stop() diff --git a/src/test/modules/test_aio/pyt/testaio.py b/src/test/modules/test_aio/pyt/testaio.py new file mode 100644 index 0000000000000..0f2621e625dd2 --- /dev/null +++ b/src/test/modules/test_aio/pyt/testaio.py @@ -0,0 +1,82 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Python port of TestAio (src/test/modules/test_aio/t/TestAio.pm). + +Helpers for writing AIO-related pytest tests: enumerating the supported +io_method GUC values and applying the shared cluster configuration the test +suite expects. +""" + +import os +import re +import subprocess + +import pypg + + +def have_io_uring(): + """Return True if this build supports io_method=io_uring. + + Mirrors TestAio::have_io_uring. To detect whether io_uring is supported, we + look at the error message for assigning an invalid value to the io_method + enum GUC, which lists all the valid options. We use ``postgres -C`` so the + superuser check is omitted (matters when running as administrator on + Windows). + + As a fast path we first consult pg_config.h for ``#define USE_LIBURING 1``; + when that header marker is present the runtime probe is skipped. + """ + if pypg.check_pg_config(r"#define USE_LIBURING 1"): + return True + + postgres = os.environ.get("PG_CONFIG") + if postgres: + bindir = subprocess.run( + [postgres, "--bindir"], + stdout=subprocess.PIPE, + encoding="utf-8", + check=True, + ).stdout.strip() + postgres = os.path.join(bindir, "postgres") + else: + postgres = "postgres" + + proc = subprocess.run( + [postgres, "-C", "invalid", "-c", "io_method=invalid"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + errors="replace", + check=False, + ) + match = re.search(r"Available values: ([^.]+)\.", proc.stderr) + if match is None: + raise RuntimeError("can't determine supported io_method values") + return "io_uring" in match.group(1) + + +def supported_io_methods(): + """Return the list of supported values for the io_method GUC. + + Mirrors TestAio::supported_io_methods: ``worker`` first, ``io_uring`` if the + build supports it, and ``sync`` last (it least commonly fails). + """ + io_methods = ["worker"] + if have_io_uring(): + io_methods.append("io_uring") + # Return sync last, as it will least commonly fail. + io_methods.append("sync") + return io_methods + + +def configure(node): + """Prepare a cluster for AIO tests (mirrors TestAio::configure).""" + node.append_conf( + "\n" + "shared_preload_libraries=test_aio\n" + "log_min_messages = 'DEBUG3'\n" + "log_statement=all\n" + "log_error_verbosity=default\n" + "restart_after_crash=false\n" + "temp_buffers=100\n" + ) diff --git a/src/test/modules/test_autovacuum/meson.build b/src/test/modules/test_autovacuum/meson.build index 86e392bc0de5b..babf2eae9934d 100644 --- a/src/test/modules/test_autovacuum/meson.build +++ b/src/test/modules/test_autovacuum/meson.build @@ -12,4 +12,12 @@ tests += { 't/001_parallel_autovacuum.pl', ], }, + 'pytest': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_001_parallel_autovacuum.py', + ], + }, } diff --git a/src/test/modules/test_autovacuum/pyt/test_001_parallel_autovacuum.py b/src/test/modules/test_autovacuum/pyt/test_001_parallel_autovacuum.py new file mode 100644 index 0000000000000..2e8e888614f55 --- /dev/null +++ b/src/test/modules/test_autovacuum/pyt/test_001_parallel_autovacuum.py @@ -0,0 +1,102 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/test_autovacuum/t/001_parallel_autovacuum.pl. + +Parallel autovacuum: a table configured with autovacuum_parallel_workers runs +its index vacuum phase with the expected number of parallel workers, and a +cost-parameter change made while a parallel autovacuum is paused (via an +injection point) is propagated to the already-launched parallel workers. +Requires an injection-points build. +""" + +import os + +import pytest + + +def _prepare_for_next_test(node, test_number): + node.safe_psql( + "ALTER TABLE test_autovac SET (autovacuum_enabled = false);\n" + "UPDATE test_autovac SET col_1 = {};".format(test_number) + ) + + +def test_001_parallel_autovacuum(create_pg): + """Parallel autovacuum launches workers and propagates cost-param changes.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("main", start=False) + node.append_conf( + "\nautovacuum_max_workers = 1\nautovacuum_worker_slots = 1\n" + "autovacuum_max_parallel_workers = 2\nmax_worker_processes = 10\n" + "max_parallel_workers = 10\nlog_min_messages = debug2\n" + "autovacuum_naptime = '1s'\nmin_parallel_index_scan_size = 0\n" + "log_autovacuum_min_duration = -1\n" + ) + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql("CREATE EXTENSION injection_points;") + indexes_num = 3 + initial_rows_num = 10_000 + autovacuum_parallel_workers = 2 + node.safe_psql( + "CREATE TABLE test_autovac (\n" + " id SERIAL PRIMARY KEY,\n" + " col_1 INTEGER, col_2 INTEGER, col_3 INTEGER, col_4 INTEGER\n" + ") WITH (autovacuum_parallel_workers = {},\n" + " log_autovacuum_min_duration = 0);\n" + "INSERT INTO test_autovac\n" + "SELECT g AS col1, g + 1 AS col2, g + 2 AS col3, g + 3 AS col4\n" + "FROM generate_series(1, {}) AS g;".format( + autovacuum_parallel_workers, initial_rows_num + ) + ) + node.safe_psql( + "DO $$\n" + "DECLARE\n" + " i INTEGER;\n" + "BEGIN\n" + " FOR i IN 1..{} LOOP\n" + " EXECUTE format('CREATE INDEX idx_col_%s ON test_autovac " + "(col_%s);', i, i);\n" + " END LOOP;\n" + "END $$;".format(indexes_num) + ) + _prepare_for_next_test(node, 1) + log_offset = node.current_log_position() + node.safe_psql("ALTER TABLE test_autovac SET (autovacuum_enabled = true);") + node.wait_for_log( + r"parallel workers: index vacuum: 2 planned, 2 launched in total", + log_offset, + ) + _prepare_for_next_test(node, 2) + log_offset = node.current_log_position() + node.safe_psql( + "SELECT injection_points_attach('autovacuum-start-parallel-vacuum', " + "'wait');\n" + "ALTER TABLE test_autovac SET (autovacuum_parallel_workers = 1, " + "autovacuum_enabled = true);" + ) + node.wait_for_event("autovacuum worker", "autovacuum-start-parallel-vacuum") + node.safe_psql( + "ALTER SYSTEM SET autovacuum_vacuum_cost_limit = 500;\n" + "ALTER SYSTEM SET autovacuum_vacuum_cost_delay = 5;\n" + "ALTER SYSTEM SET vacuum_cost_page_miss = 10;\n" + "ALTER SYSTEM SET vacuum_cost_page_dirty = 10;\n" + "ALTER SYSTEM SET vacuum_cost_page_hit = 10;\n" + "SELECT pg_reload_conf();" + ) + node.safe_psql( + "SELECT injection_points_wakeup('autovacuum-start-parallel-vacuum');" + ) + node.wait_for_log( + r"parallel autovacuum worker updated cost params: cost_limit=500, " + r"cost_delay=5, cost_page_miss=10, cost_page_dirty=10, cost_page_hit=10", + log_offset, + ) + node.safe_psql( + "SELECT injection_points_detach('autovacuum-start-parallel-vacuum');" + ) + node.stop() diff --git a/src/test/modules/test_checksums/meson.build b/src/test/modules/test_checksums/meson.build index 9b1421a9b9136..ea9bcdfcd9148 100644 --- a/src/test/modules/test_checksums/meson.build +++ b/src/test/modules/test_checksums/meson.build @@ -35,4 +35,20 @@ tests += { 't/009_fpi.pl', ], }, + 'pytest': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_restarts.py', + 'pyt/test_003_standby_restarts.py', + 'pyt/test_004_offline.py', + 'pyt/test_005_injection.py', + 'pyt/test_006_pgbench_single.py', + 'pyt/test_007_pgbench_standby.py', + 'pyt/test_008_pitr.py', + 'pyt/test_009_fpi.py', + ], + }, } diff --git a/src/test/modules/test_checksums/pyt/datachecksums_utils.py b/src/test/modules/test_checksums/pyt/datachecksums_utils.py new file mode 100644 index 0000000000000..ec96d71cf29d8 --- /dev/null +++ b/src/test/modules/test_checksums/pyt/datachecksums_utils.py @@ -0,0 +1,76 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Python port of DataChecksums::Utils (src/test/modules/test_checksums/t). + +Helpers for driving and observing online data-checksum enable/disable in a +running cluster, shared by the test_checksums pytest suite. +""" + +import random +import time + + +def test_checksum_state(node, state): + """Assert the data_checksums GUC equals state; return whether it matched.""" + result = node.safe_psql( + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';" + ) + assert result == state, "ensure checksums are set to {} on {}".format( + state, node.name + ) + return result == state + + +def wait_for_checksum_state(node, state): + """Poll until data_checksums reaches state; assert success; return bool.""" + res = node.poll_query_until( + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", + state, + ) + assert res, "ensure data checksums are transitioned to {} on {}".format( + state, node.name + ) + return res + + +def enable_data_checksums(node, cost_delay=0, cost_limit=100, wait=None): + """Enable data checksums online, optionally waiting for the end state.""" + node.safe_psql( + "SELECT pg_enable_data_checksums({}, {});".format(cost_delay, cost_limit) + ) + if wait is not None: + wait_for_checksum_state(node, wait) + if wait in ("on", "off"): + node.poll_query_until( + "SELECT count(*) = 0 FROM pg_catalog.pg_stat_activity " + "WHERE backend_type = 'datachecksums launcher';" + ) + + +def disable_data_checksums(node, wait=None): + """Disable data checksums, optionally waiting for the off state.""" + node.safe_psql("SELECT pg_disable_data_checksums();") + if wait is not None: + wait_for_checksum_state(node, "off") + node.poll_query_until( + "SELECT count(*) = 0 FROM pg_catalog.pg_stat_activity " + "WHERE backend_type = 'datachecksums launcher';" + ) + + +def cointoss(): + """Return 0 or 1 with even probability.""" + return int(random.random() < 0.5) + + +def random_sleep(max_seconds=3): + """Sleep a random (0, max_seconds) interval about half the time.""" + if max_seconds == 0: + return + if cointoss(): + time.sleep(int(random.random() * max_seconds)) + + +def stopmode(): + """Pick a valid stop mode ('immediate' or 'fast') at random.""" + return "immediate" if cointoss() else "fast" diff --git a/src/test/modules/test_checksums/pyt/test_001_basic.py b/src/test/modules/test_checksums/pyt/test_001_basic.py new file mode 100644 index 0000000000000..b076b4f223063 --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_001_basic.py @@ -0,0 +1,33 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/001_basic.pl. + +Online enabling and disabling of data checksums on a single node: pages written +while checksums were off are read back correctly after checksums are enabled, +re-enabling is idempotent, and disabling then re-enabling preserves the data. +""" + +import datachecksums_utils as dcu # pyrefly: ignore + + +def test_001_basic(create_pg): + """Data checksums can be enabled/disabled online without data loss.""" + node = create_pg("basic_node", no_data_checksums=True) + node.safe_psql("CREATE TABLE t AS SELECT generate_series(1,10000) AS a;") + dcu.test_checksum_state(node, "off") + dcu.enable_data_checksums(node, wait="on") + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1 ") + assert result == "9999", "ensure checksummed pages can be read back" + dcu.enable_data_checksums(node) + dcu.test_checksum_state(node, "on") + node.safe_psql("UPDATE t SET a = a + 1;") + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "10000", "ensure checksummed pages can be read back" + dcu.disable_data_checksums(node, wait=1) + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "10000", "ensure previously checksummed pages can be read back" + node.safe_psql("UPDATE t SET a = a + 1;") + dcu.enable_data_checksums(node, wait="on") + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "10000", "ensure checksummed pages can be read back" + node.stop() diff --git a/src/test/modules/test_checksums/pyt/test_002_restarts.py b/src/test/modules/test_checksums/pyt/test_002_restarts.py new file mode 100644 index 0000000000000..4489793e14ff7 --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_002_restarts.py @@ -0,0 +1,63 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/002_restarts.pl. + +Online checksum enabling blocked by an open temporary table stays in the +inprogress-on state and does not persist across a restart; once the blocker is +gone, enabling completes and the worker/launcher terminate. The blocked-table +scenario runs only with checksum_extended in PG_TEST_EXTRA (it relies on the +worker's retry timing); the completion path always runs. +""" + +import os +import time + +import datachecksums_utils as dcu # pyrefly: ignore + + +def _checksum_extended(): + extra = os.environ.get("PG_TEST_EXTRA", "") + return "checksum_extended" in extra.split() + + +def test_002_restarts(create_pg): + """Checksum enabling blocks on open temp tables and survives restart off.""" + node = create_pg("restarts_node", no_data_checksums=True) + node.safe_psql("CREATE TABLE t AS SELECT generate_series(1,10000) AS a;") + dcu.test_checksum_state(node, "off") + if _checksum_extended(): + bsession = node.background_psql("postgres") + bsession.query_safe("CREATE TEMPORARY TABLE tt (a integer);") + result = node.safe_psql( + "SELECT relpersistence FROM pg_catalog.pg_class WHERE relname = 'tt';" + ) + assert result == "t", "ensure we can see the temporary table" + dcu.enable_data_checksums(node, wait="inprogress-on") + node.poll_query_until( + "SELECT wait_event FROM pg_catalog.pg_stat_activity " + "WHERE backend_type = 'datachecksums worker';", + "ChecksumEnableTemptableWait", + ) + time.sleep(4) + result = node.safe_psql( + "SELECT wait_event FROM pg_catalog.pg_stat_activity " + "WHERE backend_type = 'datachecksums worker';" + ) + assert ( + result == "ChecksumEnableTemptableWait" + ), "ensure the correct wait condition is set" + dcu.test_checksum_state(node, "inprogress-on") + node.stop() + bsession.quit() + node.start() + dcu.test_checksum_state(node, "off") + dcu.enable_data_checksums(node, wait="on") + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "9999", "ensure checksummed pages can be read back" + assert node.poll_query_until( + "SELECT count(*) FROM pg_stat_activity " + "WHERE backend_type LIKE 'datachecksums%';", + "0", + ), "await datachecksums worker/launcher termination" + dcu.disable_data_checksums(node, wait=1) + node.stop() diff --git a/src/test/modules/test_checksums/pyt/test_003_standby_restarts.py b/src/test/modules/test_checksums/pyt/test_003_standby_restarts.py new file mode 100644 index 0000000000000..d2c7c15eef7a2 --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_003_standby_restarts.py @@ -0,0 +1,185 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/003_standby_restarts.pl. + +Online checksum changes propagate correctly to a streaming standby: enabling on +the primary moves the standby through inprogress-on to on, disabling moves it +back, unlogged relations behave correctly across the transition and a +promotion, and a promotion while still inprogress-on leaves the new primary with +checksums off. No page-verification errors on either node. +""" + +import os +import re + +import pypg + +import datachecksums_utils as dcu # pyrefly: ignore + +_NO_CSUM_ERR = r"page verification failed,.+\d$" + + +def _read_back(node, expected, msg): + assert node.safe_psql("SELECT count(a) FROM t WHERE a > 1") == expected, msg + + +def test_003_standby_restarts(create_pg): + """Checksum enable/disable propagates to a streaming standby correctly.""" + primary = create_pg( + "standby_restarts_primary", allows_streaming=True, no_data_checksums=True + ) + slotname = "physical_slot" + primary.safe_psql( + "SELECT pg_create_physical_replication_slot('{}')".format(slotname) + ) + backup_name = "my_backup" + primary.backup(backup_name) + standby = create_pg( + "standby_restarts_standby", + from_backup=(primary, backup_name), + has_streaming=True, + start=False, + ) + standby.append_conf("\nprimary_slot_name = '{}'\n".format(slotname)) + standby.start() + primary.safe_psql("CREATE TABLE t AS SELECT generate_series(1,10000) AS a;") + primary.wait_for_catchup(standby, "replay", primary.lsn("insert")) + dcu.test_checksum_state(primary, "off") + dcu.test_checksum_state(standby, "off") + dcu.enable_data_checksums(primary) + assert primary.poll_query_until( + "SELECT setting = 'off' FROM pg_catalog.pg_settings " + "WHERE name = 'data_checksums';", + "f", + ), "ensure primary has transitioned from off" + primary.wait_for_catchup(standby, "replay") + assert standby.poll_query_until( + "SELECT setting = 'off' FROM pg_catalog.pg_settings " + "WHERE name = 'data_checksums';", + "f", + ), "ensure standby has absorbed the inprogress-on barrier" + state = standby.safe_psql( + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';" + ) + assert state in ( + "inprogress-on", + "on", + ), "ensure checksums are on, or in progress, on standby_1" + primary.safe_psql("INSERT INTO t VALUES (generate_series(1, 10000));") + dcu.wait_for_checksum_state(primary, "on") + dcu.wait_for_checksum_state(standby, "on") + _read_back(primary, "19998", "ensure we can safely read all data with checksums") + assert primary.poll_query_until( + "SELECT count(*) FROM pg_stat_activity " + "WHERE backend_type LIKE 'datachecksums%';", + "0", + ), "await datachecksums worker/launcher termination" + dcu.disable_data_checksums(primary) + primary.wait_for_catchup(standby, "replay") + dcu.wait_for_checksum_state(primary, "off") + dcu.wait_for_checksum_state(standby, "off") + _read_back(primary, "19998", "ensure we can safely read all data without checksums") + _unlogged_checks(primary, standby) + _promote_inprogress(create_pg, primary, slotname) + + +def _unlogged_checks(primary, standby): + """Unlogged relations behave correctly across a checksum enable + promote.""" + primary.safe_psql( + "CREATE UNLOGGED TABLE unlogged_tbl AS SELECT generate_series(1,1000) AS a;" + ) + primary.safe_psql( + "CREATE UNLOGGED TABLE unlogged_promo (id int PRIMARY KEY, payload text);\n" + "INSERT INTO unlogged_promo SELECT g, repeat('x', 100) " + "FROM generate_series(1, 1000) g;\n" + "CREATE INDEX unlogged_promo_payload_idx ON unlogged_promo (payload);" + ) + primary.wait_for_catchup(standby, "replay", primary.lsn("insert")) + unlogged_rfn = primary.safe_psql( + "SELECT relfilenode FROM pg_class WHERE relname = 'unlogged_tbl';" + ) + db_oid = primary.safe_psql( + "SELECT oid FROM pg_database WHERE datname = 'postgres';" + ) + main_fork = "{}/base/{}/{}".format(standby.datadir, db_oid, unlogged_rfn) + assert not os.path.isfile( + main_fork + ), "standby has no main fork for unlogged table before enable" + dcu.enable_data_checksums(primary, wait="on") + dcu.wait_for_checksum_state(standby, "on") + primary.wait_for_catchup(standby, "replay", primary.lsn("insert")) + assert not os.path.isfile( + main_fork + ), "standby has no main fork for unlogged table after enable" + assert ( + standby.safe_psql("SELECT pg_relation_size('unlogged_tbl', 'main');") == "0" + ), "unlogged table has zero size on standby after checksum enable" + assert ( + primary.safe_psql("SELECT count(*) FROM unlogged_tbl;") == "1000" + ), "unlogged table readable on primary after checksum enable" + primary.safe_psql("ALTER TABLE unlogged_tbl SET logged;") + primary.wait_for_catchup(standby, "replay", primary.lsn("insert")) + assert ( + primary.safe_psql("SELECT sum(a) FROM unlogged_tbl;") == "500500" + ), "previously unlogged table can be read on primary" + assert ( + standby.safe_psql("SELECT sum(a) FROM unlogged_tbl;") == "500500" + ), "previously unlogged table can be read on standby" + primary.stop() + standby.promote() + assert ( + standby.safe_psql("SELECT count(*) FROM unlogged_promo;") == "0" + ), "unlogged table readable on promoted standby (truncated as expected)" + standby.safe_psql( + "INSERT INTO unlogged_promo SELECT g, repeat('y',100) " + "FROM generate_series(1,100) g;" + ) + assert ( + standby.safe_psql( + "SET enable_seqscan = off; SELECT id FROM unlogged_promo WHERE id = 50;" + ) + == "50" + ), "indexed lookup on promoted standby returns expected row" + standby.stop() + _assert_clean_log(primary, "primary") + _assert_clean_log(standby, "standby") + standby.clean_node() + primary.start() + + +def _promote_inprogress(create_pg, primary, slotname): + """Promotion while still inprogress-on leaves the new primary with off.""" + dcu.disable_data_checksums(primary, wait="off") + backup_name = "my_new_backup" + primary.backup(backup_name) + standby = create_pg( + "standby_restarts_standby2", + from_backup=(primary, backup_name), + has_streaming=True, + start=False, + ) + standby.append_conf("\nprimary_slot_name = '{}'\n".format(slotname)) + standby.start() + primary.wait_for_catchup(standby, "replay") + primary_bpsql = primary.background_psql("postgres") + primary_bpsql.query_safe("CREATE TEMPORARY TABLE tt (a integer);") + standby_bpsql = standby.background_psql("postgres") + dcu.enable_data_checksums(primary, wait="inprogress-on") + primary.wait_for_catchup(standby, "replay") + dcu.test_checksum_state(standby, "inprogress-on") + primary.teardown_node() + standby.promote() + dcu.wait_for_checksum_state(standby, "off") + assert ( + standby_bpsql.query_safe("SHOW data_checksums;").strip() == "off" + ), "ensure checksums are set to off after promotion during inprogress-on" + standby_bpsql.quit() + primary_bpsql.quit() + standby.stop() + + +def _assert_clean_log(node, label): + log = pypg.slurp_file(node.log, 0) + assert not re.search( + _NO_CSUM_ERR, log, re.MULTILINE + ), "no checksum validation errors in {} log".format(label) diff --git a/src/test/modules/test_checksums/pyt/test_004_offline.py b/src/test/modules/test_checksums/pyt/test_004_offline.py new file mode 100644 index 0000000000000..fec7679589c5b --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_004_offline.py @@ -0,0 +1,42 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/004_offline.pl. + +Offline checksum enable/disable via pg_checksums interoperates with the online +state machine: pages are read back correctly after offline enabling, and an +online enable left in the inprogress-on state can be completed offline. +""" + +import datachecksums_utils as dcu # pyrefly: ignore + + +def test_004_offline(create_pg): + """Offline pg_checksums enable/disable round-trips with online state.""" + node = create_pg("offline_node", no_data_checksums=True) + node.safe_psql("CREATE TABLE t AS SELECT generate_series(1,10000) AS a;") + dcu.test_checksum_state(node, "off") + node.stop() + node.checksum_enable_offline() + node.start() + dcu.test_checksum_state(node, "on") + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "9999", "ensure checksummed pages can be read back" + node.stop() + node.checksum_disable_offline() + node.start() + dcu.test_checksum_state(node, "off") + bsession = node.background_psql("postgres") + bsession.query_safe("CREATE TEMPORARY TABLE tt (a integer);") + result = node.safe_psql( + "SELECT relpersistence FROM pg_catalog.pg_class WHERE relname = 'tt';" + ) + assert result == "t", "ensure we can see the temporary table" + dcu.enable_data_checksums(node, wait="inprogress-on") + node.stop("fast") + bsession.quit() + node.checksum_enable_offline() + node.start() + dcu.test_checksum_state(node, "on") + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "9999", "ensure checksummed pages can be read back" + node.stop() diff --git a/src/test/modules/test_checksums/pyt/test_005_injection.py b/src/test/modules/test_checksums/pyt/test_005_injection.py new file mode 100644 index 0000000000000..2ee49bf915a7f --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_005_injection.py @@ -0,0 +1,49 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/005_injection.pl. + +Uses injection points to exercise the data-checksum worker's error and retry +paths synthetically: a forced per-database failure aborts enabling, and (with +checksum_extended) an injected barrier delay and a faked temporary-table wait +drive the worker's retry loop. Requires an injection-points build. +""" + +import os + +import pytest + +import datachecksums_utils as dcu # pyrefly: ignore + + +def _checksum_extended(): + return "checksum_extended" in os.environ.get("PG_TEST_EXTRA", "").split() + + +def test_005_injection(create_pg): + """Injection points drive the checksum worker's failure and retry paths.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("injection_node", no_data_checksums=True) + node.safe_psql("CREATE EXTENSION test_checksums;") + node.safe_psql("CREATE EXTENSION injection_points;") + dcu.disable_data_checksums(node, wait=1) + node.safe_psql( + "SELECT injection_points_attach('datachecksumsworker-fail-db-result','notice');" + ) + dcu.enable_data_checksums(node, wait="off") + node.safe_psql( + "SELECT injection_points_detach('datachecksumsworker-fail-db-result');" + ) + dcu.disable_data_checksums(node) + dcu.test_checksum_state(node, "off") + if _checksum_extended(): + dcu.disable_data_checksums(node, wait=1) + node.safe_psql("SELECT dcw_inject_delay_barrier();") + dcu.enable_data_checksums(node, wait="on") + dcu.disable_data_checksums(node, wait=1) + node.safe_psql( + "SELECT injection_points_attach(" + "'datachecksumsworker-fake-temptable-wait', 'notice');" + ) + dcu.enable_data_checksums(node, wait="on") + node.stop() diff --git a/src/test/modules/test_checksums/pyt/test_006_pgbench_single.py b/src/test/modules/test_checksums/pyt/test_006_pgbench_single.py new file mode 100644 index 0000000000000..6fe39e7f0d65d --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_006_pgbench_single.py @@ -0,0 +1,163 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/006_pgbench_single.pl. + +Stress single-node online checksum flipping under concurrent pgbench load with +random crash/restart cycles, verifying after each that all rows read back and +the server log never reports a page-verification (checksum) failure -- including +during WAL recovery. Gated on PG_TEST_EXTRA checksum/checksum_extended and an +injection-points build. +""" + +import os +import random +import re +import subprocess + +import pytest + +import pypg + +import datachecksums_utils as dcu # pyrefly: ignore + +_NO_CSUM_ERR = r"page verification failed,.+\d$" + + +def _extended(): + return "checksum_extended" in os.environ.get("PG_TEST_EXTRA", "").split() + + +def _start_bg_pgbench(node): + """Start a fire-and-forget pgbench load against node; return the Popen.""" + extended = _extended() + clients = 1 + random.randrange(15) if extended else 1 + runtime = 600 if extended else 2 + cmd = [ + "pgbench", + "-h", + str(node.host), + "-p", + str(node.port), + "-T", + str(runtime), + "-c", + str(clients), + ] + if extended and dcu.cointoss(): + cmd.append("-C") + cmd.append("postgres") + return subprocess.Popen( # pylint: disable=consider-using-with + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + +def _flip_data_checksums(node, state): + """Flip checksums to the opposite of state (off<->on); return new state.""" + dcu.test_checksum_state(node, state) + if state == "off": + temptablewait = dcu.cointoss() + if temptablewait: + node.safe_psql( + "SELECT injection_points_attach(" + "'datachecksumsworker-fake-temptable-wait', 'notice');" + ) + dcu.enable_data_checksums(node, wait="inprogress-on") + dcu.wait_for_checksum_state(node, "on") + if temptablewait: + node.safe_psql( + "SELECT injection_points_detach(" + "'datachecksumsworker-fake-temptable-wait');" + ) + return "on" + dcu.disable_data_checksums(node) + dcu.wait_for_checksum_state(node, "off") + return "off" + + +def test_006_pgbench_single(create_pg): + """Online checksum flips survive crash/restart under load, no csum errors.""" + tokens = os.environ.get("PG_TEST_EXTRA", "").split() + if "checksum" not in tokens and "checksum_extended" not in tokens: + pytest.skip("Expensive data checksums test disabled") + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + extended = _extended() + iterations = 10 if extended else 1 + state = "off" + node = create_pg( + "pgbench_single_main", + allows_streaming=True, + no_data_checksums=True, + start=False, + ) + node.append_conf("\nmax_connections = 100\nlog_statement = none\n") + node.start() + node.safe_psql("CREATE EXTENSION test_checksums;") + node.safe_psql("CREATE EXTENSION injection_points;") + node.safe_psql("CREATE TABLE t AS SELECT generate_series(1, 100000) AS a;") + scalefactor = 10 if extended else 1 + node.command_ok( + [ + "pgbench", + "-p", + str(node.port), + "-i", + "-s", + str(scalefactor), + "-q", + "postgres", + ] + ) + loglocation = 0 + pgbench = _start_bg_pgbench(node) + try: + for _ in range(iterations): + if not node.is_alive(): + node.start() + node.stop("fast") + log = pypg.slurp_file(node.log, loglocation) + _assert_no_csum_errors(log, "during WAL recovery") + loglocation = node.current_log_position() + node.append_conf( + "max_wal_size = {}".format(64 + random.randrange(1024)) + ) + node.start() + pgbench = _restart_bg(pgbench, node) + node.safe_psql("UPDATE t SET a = a + 1;") + state = _flip_data_checksums(node, state) + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "100000", "ensure data pages can be read back on primary" + if dcu.cointoss(): + node.stop(dcu.stopmode()) + node.bin.run_command(["pg_controldata", str(node.datadir)]) + log = pypg.slurp_file(node.log, loglocation) + _assert_no_csum_errors(log, "outside WAL recovery") + loglocation = node.current_log_position() + finally: + pgbench.terminate() + pgbench.wait() + if not node.is_alive(): + node.start() + result = node.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "100000", "ensure data pages can be read back on primary" + dcu.test_checksum_state(node, state) + log = pypg.slurp_file(node.log, loglocation) + _assert_no_csum_errors(log, "") + node.stop() + + +def _restart_bg(pgbench, node): + """Stop the previous background pgbench and start a fresh one.""" + pgbench.terminate() + pgbench.wait() + return _start_bg_pgbench(node) + + +def _assert_no_csum_errors(log, where): + suffix = " ({})".format(where) if where else "" + assert not re.search(_NO_CSUM_ERR, log, re.MULTILINE), ( + "no checksum validation errors in primary log" + suffix + ) diff --git a/src/test/modules/test_checksums/pyt/test_007_pgbench_standby.py b/src/test/modules/test_checksums/pyt/test_007_pgbench_standby.py new file mode 100644 index 0000000000000..d2075d41b08d1 --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_007_pgbench_standby.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/007_pgbench_standby.pl. + +Stress online checksum flipping on a primary with a streaming standby, both +under concurrent pgbench load (read/write on the primary, read-only on the +standby) and random crash/restart cycles. After each flip the standby must move +through inprogress-on to on (and back), all rows read back, and neither node's +log reports a page-verification failure. Gated on PG_TEST_EXTRA checksum/ +checksum_extended and an injection-points build. +""" + +import os +import random +import re +import subprocess + +import pytest + +import pypg + +import datachecksums_utils as dcu # pyrefly: ignore + +_NO_CSUM_ERR = r"page verification failed,.+\d$" + + +def _extended(): + return "checksum_extended" in os.environ.get("PG_TEST_EXTRA", "").split() + + +def _bg_pgbench(node, standby): + """Start a fire-and-forget pgbench (read-only on a standby); return Popen.""" + extended = _extended() + clients = 1 + random.randrange(15) if extended else 1 + runtime = 600 if extended else 5 + cmd = [ + "pgbench", + "-h", + str(node.host), + "-p", + str(node.port), + "-T", + str(runtime), + "-c", + str(clients), + ] + if extended and dcu.cointoss(): + cmd.append("-C") + if standby: + cmd += ["-S", "-n"] + cmd.append("postgres") + return subprocess.Popen( # pylint: disable=consider-using-with + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + +def _assert_clean(node, location, where): + log = pypg.slurp_file(node.log, location) + assert not re.search( + _NO_CSUM_ERR, log, re.MULTILINE + ), "no checksum validation errors in {} log{}".format( + where, " (during WAL recovery)" if where.endswith("rec") else "" + ) + + +def _flip(primary, standby, state): + """Flip checksums on the primary and verify standby propagation.""" + dcu.test_checksum_state(primary, state) + dcu.test_checksum_state(standby, state) + if state == "off": + temptablewait = dcu.cointoss() + if temptablewait: + primary.safe_psql( + "SELECT injection_points_attach(" + "'datachecksumsworker-fake-temptable-wait', 'notice');" + ) + dcu.enable_data_checksums(primary, wait="inprogress-on") + primary.wait_for_catchup(standby, "replay") + assert standby.poll_query_until( + "SELECT setting = 'off' FROM pg_catalog.pg_settings " + "WHERE name = 'data_checksums';", + "f", + ), "ensure standby has absorbed the inprogress-on barrier" + sstate = standby.safe_psql( + "SELECT setting FROM pg_catalog.pg_settings " + "WHERE name = 'data_checksums';" + ) + assert sstate in ("inprogress-on", "on"), ( + "ensure checksums are on, or in progress, on standby_1, got: " + sstate + ) + dcu.wait_for_checksum_state(primary, "on") + dcu.wait_for_checksum_state(standby, "on") + if temptablewait: + primary.safe_psql( + "SELECT injection_points_detach(" + "'datachecksumsworker-fake-temptable-wait');" + ) + return "on" + dcu.disable_data_checksums(primary) + primary.wait_for_catchup(standby, "replay") + dcu.wait_for_checksum_state(primary, "off") + dcu.wait_for_checksum_state(standby, "off") + return "off" + + +def test_007_pgbench_standby(create_pg): + """Checksum flips under load + crash/restart stay correct across replication.""" + tokens = os.environ.get("PG_TEST_EXTRA", "").split() + if "checksum" not in tokens and "checksum_extended" not in tokens: + pytest.skip("Expensive data checksums test disabled") + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + extended = _extended() + iterations = 5 if extended else 1 + slotname = "physical_slot" + state = "off" + primary = create_pg( + "pgbench_standby_main", + allows_streaming=True, + no_data_checksums=True, + start=False, + ) + primary.append_conf( + "\nmax_connections = 30\nlog_statement = none\nhot_standby_feedback = on\n" + ) + primary.start() + primary.safe_psql("CREATE EXTENSION test_checksums;") + primary.safe_psql("CREATE EXTENSION injection_points;") + primary.safe_psql("CREATE TABLE t AS SELECT generate_series(1, 100000) AS a;") + primary.safe_psql( + "SELECT pg_create_physical_replication_slot('{}');".format(slotname) + ) + backup_name = "primary_backup" + primary.backup(backup_name) + standby = create_pg( + "pgbench_standby_standby", + from_backup=(primary, backup_name), + has_streaming=True, + start=False, + ) + standby.append_conf("\nprimary_slot_name = '{}'\n".format(slotname)) + standby.start() + scalefactor = 10 if extended else 1 + primary.command_ok( + [ + "pgbench", + "-p", + str(primary.port), + "-i", + "-s", + str(scalefactor), + "-q", + "postgres", + ] + ) + primary.wait_for_catchup(standby, "replay") + bg = [_bg_pgbench(standby, True), _bg_pgbench(primary, False)] + try: + for _ in range(iterations): + primary.safe_psql("UPDATE t SET a = a + 1;") + primary.wait_for_catchup(standby, "write") + state = _flip(primary, standby, state) + assert ( + primary.safe_psql("SELECT count(*) FROM t WHERE a > 1") == "100000" + ), "ensure data pages can be read back on primary" + dcu.random_sleep() + finally: + for proc in bg: + proc.terminate() + proc.wait() + if not primary.is_alive(): + primary.start() + if not standby.is_alive(): + standby.start() + assert ( + primary.safe_psql("SELECT count(*) FROM t WHERE a > 1") == "100000" + ), "ensure data pages can be read back on primary" + dcu.test_checksum_state(primary, state) + dcu.test_checksum_state(standby, state) + _assert_clean(primary, 0, "primary") + _assert_clean(standby, 0, "standby_1") + standby.teardown_node() + primary.teardown_node() diff --git a/src/test/modules/test_checksums/pyt/test_008_pitr.py b/src/test/modules/test_checksums/pyt/test_008_pitr.py new file mode 100644 index 0000000000000..77a212768d79c --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_008_pitr.py @@ -0,0 +1,136 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/008_pitr.pl. + +Point-in-time recovery across a checksum flip: with concurrent pgbench write +load, checksums are enabled on the primary and the exact LSN of the transition +is captured. A PITR replica restored to that LSN must come up with checksums in +the expected state and no page-verification errors. Gated on PG_TEST_EXTRA +containing checksum or checksum_extended (expensive). +""" + +import os +import random +import re +import subprocess + +import pytest + +import pypg + +import datachecksums_utils as dcu # pyrefly: ignore + + +def _extra_tokens(): + return os.environ.get("PG_TEST_EXTRA", "").split() + + +def _flip_data_checksums(node, state): + """Flip checksums on/off, returning (lsn_before, lsn_after) and new state.""" + dcu.test_checksum_state(node, state) + lsn_pre = node.safe_psql("SELECT pg_current_wal_lsn()") + if state == "off": + dcu.enable_data_checksums(node, wait="on") + new_state = "on" + else: + dcu.disable_data_checksums(node, wait=1) + new_state = "off" + lsn_post = node.safe_psql("SELECT pg_current_wal_lsn()") + return lsn_pre, lsn_post, new_state + + +def _start_bg_pgbench(node, extended): + """Start a fire-and-forget read/write pgbench load against node.""" + clients = 1 + random.randrange(15) if extended else 1 + runtime = 600 if extended else 5 + cmd = [ + "pgbench", + "-h", + str(node.host), + "-p", + str(node.port), + "-T", + str(runtime), + "-c", + str(clients), + ] + if extended and dcu.cointoss(): + cmd.append("-C") + cmd.append("postgres") + return subprocess.Popen( # pylint: disable=consider-using-with + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + +def test_008_pitr(create_pg): + """PITR to a checksum-flip LSN restores the expected checksum state.""" + tokens = _extra_tokens() + if "checksum" not in tokens and "checksum_extended" not in tokens: + pytest.skip("Expensive data checksums test disabled") + extended = "checksum_extended" in tokens + node_primary = create_pg( + "pitr_main", + has_archiving=True, + allows_streaming=True, + no_data_checksums=True, + start=False, + ) + timeout = pypg.test_timeout_default() + node_primary.append_conf( + "\nmax_connections = 100\nlog_statement = none\n" + "wal_sender_timeout = {t}s\nwal_receiver_timeout = {t}s\n".format(t=timeout) + ) + node_primary.start() + node_primary.safe_psql("CREATE TABLE t AS SELECT generate_series(1, 100000) AS a;") + scalefactor = 10 if extended else 1 + node_primary.command_ok( + [ + "pgbench", + "-p", + str(node_primary.port), + "-i", + "-s", + str(scalefactor), + "-q", + "postgres", + ] + ) + pgbench = _start_bg_pgbench(node_primary, extended) + try: + backup_name = "my_backup" + node_primary.backup(backup_name) + _pre_lsn, post_lsn, state = _flip_data_checksums(node_primary, "off") + node_primary.safe_psql("UPDATE t SET a = a + 1;") + node_primary.safe_psql("SELECT pg_create_restore_point('a');") + node_primary.safe_psql("UPDATE t SET a = a + 1;") + node_primary.stop("fast") + finally: + pgbench.terminate() + pgbench.wait() + node_pitr = create_pg( + "pitr_backup", + from_backup=(node_primary, backup_name), + standby=False, + has_restoring=True, + start=False, + ) + node_pitr.append_conf( + "\nrecovery_target_lsn = '{}'\n" + "recovery_target_action = 'promote'\n" + "recovery_target_inclusive = on\n".format(post_lsn) + ) + node_pitr.start() + assert node_pitr.poll_query_until( + "SELECT pg_is_in_recovery() = 'f';" + ), "Timed out while waiting for PITR promotion" + dcu.test_checksum_state(node_pitr, state) + result = node_pitr.safe_psql("SELECT count(*) FROM t WHERE a > 1") + assert result == "99999", "ensure data pages can be read back on primary" + node_pitr.stop() + log = pypg.slurp_file(node_pitr.log, 0) + assert not re.search( + r"page verification failed,.+\d$", log, re.MULTILINE + ), "no checksum validation errors in pitr log" diff --git a/src/test/modules/test_checksums/pyt/test_009_fpi.py b/src/test/modules/test_checksums/pyt/test_009_fpi.py new file mode 100644 index 0000000000000..85a31c0dde064 --- /dev/null +++ b/src/test/modules/test_checksums/pyt/test_009_fpi.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_checksums/t/009_fpi.pl. + +Checksum enabling remains correct across full_page_writes toggling and restarts: +after several enable/disable cycles and updates with FPWs off then on, all rows +read back and the server log shows no page-verification (checksum) errors. +""" + +import re + +import pypg + +import datachecksums_utils as dcu # pyrefly: ignore + + +def test_009_fpi(create_pg): + """No checksum validation errors across full_page_writes toggling.""" + node = create_pg("fpi_node", allows_streaming=True, no_data_checksums=True) + node.append_conf("\nmax_connections = 100\nlog_statement = none\n") + node.safe_psql("CREATE EXTENSION test_checksums;") + node.safe_psql("CREATE TABLE t AS SELECT generate_series(1, 1000000) AS a;") + dcu.enable_data_checksums(node, wait="on") + node.safe_psql("UPDATE t SET a = a + 1;") + dcu.disable_data_checksums(node, wait=1) + node.append_conf("full_page_writes = off") + node.restart() + dcu.test_checksum_state(node, "off") + node.safe_psql("UPDATE t SET a = a + 1;") + node.safe_psql("DELETE FROM t WHERE a < 10000;") + node.adjust_conf("full_page_writes", "on") + node.restart() + dcu.test_checksum_state(node, "off") + dcu.enable_data_checksums(node, wait="on") + result = node.safe_psql("SELECT count(*) FROM t;") + assert result == "990003", "Reading back all data from table t" + node.stop() + log = pypg.slurp_file(node.log, 0) + assert not re.search( + r"page verification failed,.+\d$", log, re.MULTILINE + ), "no checksum validation errors in server log" diff --git a/src/test/modules/test_cloexec/meson.build b/src/test/modules/test_cloexec/meson.build index 63c8658b04e99..25cc4bb5d2686 100644 --- a/src/test/modules/test_cloexec/meson.build +++ b/src/test/modules/test_cloexec/meson.build @@ -23,4 +23,10 @@ tests += { 't/001_cloexec.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_cloexec.py', + ], + 'deps': [test_cloexec], + }, } diff --git a/src/test/modules/test_cloexec/pyt/test_001_cloexec.py b/src/test/modules/test_cloexec/pyt/test_001_cloexec.py new file mode 100644 index 0000000000000..c6868428ac63c --- /dev/null +++ b/src/test/modules/test_cloexec/pyt/test_001_cloexec.py @@ -0,0 +1,22 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_cloexec/t/001_cloexec.pl. + +Windows-specific: runs the test_cloexec executable and verifies that O_CLOEXEC +prevents handle inheritance. Always skips on non-Windows platforms. +""" + +import platform +import re + +import pytest + + +def test_001_cloexec(pg_bin): + """O_CLOEXEC prevents handle inheritance (Windows-only; skips elsewhere).""" + if platform.system() != "Windows": + pytest.skip("test is Windows-specific") + result = pg_bin.result(["test_cloexec"]) + assert result.rc == 0 and re.search( + r"SUCCESS.*O_CLOEXEC behavior verified", result.stdout, re.DOTALL + ), "O_CLOEXEC prevents handle inheritance" diff --git a/src/test/modules/test_custom_rmgrs/meson.build b/src/test/modules/test_custom_rmgrs/meson.build index ef26d24a1baee..c6f274e75ea18 100644 --- a/src/test/modules/test_custom_rmgrs/meson.build +++ b/src/test/modules/test_custom_rmgrs/meson.build @@ -25,6 +25,11 @@ tests += { 'name': 'test_custom_rmgrs', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + ], + }, 'tap': { 'tests': [ 't/001_basic.pl', diff --git a/src/test/modules/test_custom_rmgrs/pyt/test_001_basic.py b/src/test/modules/test_custom_rmgrs/pyt/test_001_basic.py new file mode 100644 index 0000000000000..1ceba0393d375 --- /dev/null +++ b/src/test/modules/test_custom_rmgrs/pyt/test_001_basic.py @@ -0,0 +1,50 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_custom_rmgrs/t/001_basic.pl. + +Custom WAL resource manager test module: a custom rmgr (preloaded via +shared_preload_libraries) writes a WAL record whose contents are then read back +and verified via pg_walinspect. Generated from the Perl original via +.agent/gen_golden.py. +""" + + +def test_001_basic(create_pg): + """Generated golden port of 001_basic.""" + node = create_pg("main", start=False) + node.append_conf( + "\nwal_level = 'replica'\nmax_wal_senders = 4\nshared_preload_libraries = 'test_custom_rmgrs'\n" + ) + node.start() + node.safe_psql("CREATE EXTENSION test_custom_rmgrs") + node.safe_psql("CREATE EXTENSION pg_walinspect") + start_lsn = node.safe_psql( + "SELECT lsn FROM pg_create_physical_replication_slot('regress_test_slot1', true, false);" + ) + record_end_lsn = node.safe_psql( + "SELECT * FROM test_custom_rmgrs_insert_wal_record('payload123')" + ) + node.safe_psql("SELECT pg_switch_wal()") + end_lsn = node.safe_psql("SELECT pg_current_wal_flush_lsn()") + row_count = node.safe_psql( + "SELECT count(*) FROM pg_get_wal_resource_managers()\n\t\tWHERE rm_name = 'test_custom_rmgrs';" + ) + assert ( + row_count == "1" + ), "custom WAL resource manager has successfully registered with the server" + expected = ( + str(record_end_lsn) + + "|test_custom_rmgrs|TEST_CUSTOM_RMGRS_MESSAGE|0|payload (10 bytes): payload123" + ) + result = node.safe_psql( + "SELECT end_lsn, resource_manager, record_type, fpi_length, description FROM pg_get_wal_records_info('" + + str(start_lsn) + + "', '" + + str(end_lsn) + + "')\n\t\tWHERE resource_manager = 'test_custom_rmgrs';" + ) + assert ( + result == expected + ), "custom WAL resource manager has successfully written a WAL record" + node.stop() diff --git a/src/test/modules/test_custom_stats/meson.build b/src/test/modules/test_custom_stats/meson.build index e458f6bc65ff3..de1ab602aa406 100644 --- a/src/test/modules/test_custom_stats/meson.build +++ b/src/test/modules/test_custom_stats/meson.build @@ -46,6 +46,11 @@ tests += { 'name': 'test_custom_stats', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_custom_stats.py', + ], + }, 'tap': { 'tests': [ 't/001_custom_stats.pl', diff --git a/src/test/modules/test_custom_stats/pyt/test_001_custom_stats.py b/src/test/modules/test_custom_stats/pyt/test_001_custom_stats.py new file mode 100644 index 0000000000000..91158e7507083 --- /dev/null +++ b/src/test/modules/test_custom_stats/pyt/test_001_custom_stats.py @@ -0,0 +1,86 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_custom_stats/t/001_custom_stats.pl. + +Custom cumulative-statistics test modules (variable- and fixed-numbered custom +stats kinds loaded via shared_preload_libraries): custom stats are recorded, +queried, reset, and persisted across server restarts. Generated from the Perl +original via .agent/gen_golden.py. +""" + + +def test_001_custom_stats(create_pg): + """Custom cumulative-statistics test modules (variable- and fixed-numbered.""" + node = create_pg("main", start=False) + node.append_conf( + "shared_preload_libraries = 'test_custom_var_stats, test_custom_fixed_stats'" + ) + node.start() + node.safe_psql("CREATE EXTENSION test_custom_var_stats") + node.safe_psql("CREATE EXTENSION test_custom_fixed_stats") + node.safe_psql("select test_custom_stats_var_create('entry1', 'Test entry 1')") + node.safe_psql("select test_custom_stats_var_create('entry2', 'Test entry 2')") + node.safe_psql("select test_custom_stats_var_create('entry3', 'Test entry 3')") + node.safe_psql("select test_custom_stats_var_create('entry4', 'Test entry 4')") + node.safe_psql("select test_custom_stats_var_update('entry1')") + node.safe_psql("select test_custom_stats_var_update('entry1')") + node.safe_psql("select test_custom_stats_var_update('entry2')") + node.safe_psql("select test_custom_stats_var_update('entry2')") + node.safe_psql("select test_custom_stats_var_update('entry2')") + node.safe_psql("select test_custom_stats_var_update('entry3')") + node.safe_psql("select test_custom_stats_var_update('entry3')") + node.safe_psql("select test_custom_stats_var_update('entry4')") + node.safe_psql("select test_custom_stats_var_update('entry4')") + node.safe_psql("select test_custom_stats_var_update('entry4')") + node.safe_psql("select test_custom_stats_fixed_update()") + node.safe_psql("select test_custom_stats_fixed_update()") + node.safe_psql("select test_custom_stats_fixed_update()") + result = node.safe_psql("select * from test_custom_stats_var_report('entry1')") + assert result == "entry1|2|Test entry 1", "report for variable-sized data of entry1" + result = node.safe_psql("select * from test_custom_stats_var_report('entry2')") + assert result == "entry2|3|Test entry 2", "report for variable-sized data of entry2" + result = node.safe_psql("select * from test_custom_stats_var_report('entry3')") + assert result == "entry3|2|Test entry 3", "report for variable-sized data of entry3" + result = node.safe_psql("select * from test_custom_stats_var_report('entry4')") + assert result == "entry4|3|Test entry 4", "report for variable-sized data of entry4" + result = node.safe_psql("select * from test_custom_stats_fixed_report()") + assert result == "3|", "report for fixed-sized stats" + node.safe_psql("select * from test_custom_stats_var_drop('entry3')") + result = node.safe_psql("select * from test_custom_stats_var_report('entry3')") + assert result == "", "entry3 not found after drop" + node.safe_psql("select * from test_custom_stats_var_drop('entry4')") + result = node.safe_psql("select * from test_custom_stats_var_report('entry4')") + assert result == "", "entry4 not found after drop" + node.stop() + node.start() + result = node.safe_psql("select * from test_custom_stats_var_report('entry1')") + assert ( + result == "entry1|2|Test entry 1" + ), "variable-sized stats persist after clean restart" + result = node.safe_psql("select * from test_custom_stats_var_report('entry2')") + assert ( + result == "entry2|3|Test entry 2" + ), "variable-sized stats persist after clean restart" + result = node.safe_psql("select * from test_custom_stats_fixed_report()") + assert result == "3|", "fixed-sized stats persist after clean restart" + node.stop("immediate") + node.start() + result = node.safe_psql("select * from test_custom_stats_var_report('entry1')") + assert result == "", "variable-sized stats of entry1 lost after crash recovery" + result = node.safe_psql("select * from test_custom_stats_var_report('entry2')") + assert result == "", "variable-sized stats of entry2 lost after crash recovery" + result = node.safe_psql( + "select numcalls from test_custom_stats_fixed_report() where stats_reset is not null" + ) + assert result == "0", "fixed-sized stats are reset after crash recovery" + node.safe_psql("select test_custom_stats_fixed_update()") + node.safe_psql("select test_custom_stats_fixed_update()") + node.safe_psql("select test_custom_stats_fixed_update()") + result = node.safe_psql("select numcalls from test_custom_stats_fixed_report()") + assert result == "3", "report of fixed-sized before manual reset" + node.safe_psql("select test_custom_stats_fixed_reset()") + result = node.safe_psql( + "select numcalls from test_custom_stats_fixed_report() where stats_reset is not null" + ) + assert result == "0", "report of fixed-sized after manual reset" diff --git a/src/test/modules/test_escape/meson.build b/src/test/modules/test_escape/meson.build index a21341d5067cb..8831931b41350 100644 --- a/src/test/modules/test_escape/meson.build +++ b/src/test/modules/test_escape/meson.build @@ -28,4 +28,10 @@ tests += { ], 'deps': [test_escape], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_test_escape.py', + ], + 'deps': [test_escape], + }, } diff --git a/src/test/modules/test_escape/pyt/test_001_test_escape.py b/src/test/modules/test_escape/pyt/test_001_test_escape.py new file mode 100644 index 0000000000000..ce8aa2964554b --- /dev/null +++ b/src/test/modules/test_escape/pyt/test_001_test_escape.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_escape/t/001_test_escape.pl. + +Runs the test_escape C executable against a live sql_ascii database. The program +emits TAP for each escaping case; this wrapper requires a clean exit with no +stderr and fails on any 'not ok' line or any line it cannot map to TAP. +""" + +import re + +import pytest + + +def test_001_test_escape(create_pg, pg_bin): + """test_escape exits cleanly and every emitted TAP line is 'ok'.""" + node = create_pg("node") + node.safe_psql( + 'CREATE DATABASE db_sql_ascii ENCODING "sql_ascii" TEMPLATE template0;' + ) + conninfo = node.connstr() + " dbname=db_sql_ascii" + result = pg_bin.result(["test_escape", "--conninfo", conninfo]) + assert result.rc == 0, "test_escape returns 0" + assert result.stderr == "", "test_escape stderr is empty" + for line in result.stdout.split("\n"): + if re.match(r"^ok \d+ ?(.*)", line): + continue + not_ok = re.match(r"^not ok \d+ ?(.*)", line) + if not_ok: + pytest.fail(not_ok.group(1)) + elif re.match(r"^# ?(.*)", line) or re.match(r"^\d+\.\.\d+$", line): + continue + elif line == "": + continue + else: + pytest.fail("no unmapped lines, got {}".format(line)) diff --git a/src/test/modules/test_extensions/meson.build b/src/test/modules/test_extensions/meson.build index 2c7cea189e286..9eec13ab748fe 100644 --- a/src/test/modules/test_extensions/meson.build +++ b/src/test/modules/test_extensions/meson.build @@ -75,4 +75,9 @@ tests += { 't/001_extension_control_path.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_extension_control_path.py', + ], + }, } diff --git a/src/test/modules/test_extensions/pyt/test_001_extension_control_path.py b/src/test/modules/test_extensions/pyt/test_001_extension_control_path.py new file mode 100644 index 0000000000000..67875bedff05f --- /dev/null +++ b/src/test/modules/test_extensions/pyt/test_001_extension_control_path.py @@ -0,0 +1,137 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/test_extensions/t/001_extension_control_path.pl. + +extension_control_path lets extensions live outside $system: custom .control/ +.sql files placed in mapped directories are found by CREATE EXTENSION and shown +(with their location) in pg_available_extensions[_versions], the location is +hidden from unprivileged users, $system extensions still resolve, and a +nonexistent extension fails cleanly. +""" + +import os +import re +import tempfile + + +def _create_extension(ext_name, ext_dir, directory=None): + control_file = "{}/extension/{}.control".format(ext_dir, ext_name) + if directory is not None: + sql_file = "{}/{}/{}--1.0.sql".format(ext_dir, directory, ext_name) + else: + sql_file = "{}/extension/{}--1.0.sql".format(ext_dir, ext_name) + with open(control_file, "w", encoding="utf-8") as cf: + cf.write("comment = 'Test extension_control_path'\n") + cf.write("default_version = '1.0'\n") + cf.write("relocatable = true\n") + if directory is not None: + cf.write("directory = {}".format(directory)) + with open(sql_file, "w", encoding="utf-8") as sqlf: + sqlf.write("/* {} */\n".format(sql_file)) + sqlf.write( + "-- complain if script is sourced in psql, rather than via " + "CREATE EXTENSION\n" + ) + sqlf.write( + '\\echo Use "CREATE EXTENSION {}" to load this file. \\quit\n'.format( + ext_name + ) + ) + + +def test_001_extension_control_path(create_pg): + """Custom extension_control_path directories are honored and access-gated.""" + node = create_pg("node", auth_extra=["--create-role", "user01"], start=False) + ext_dir = tempfile.mkdtemp(prefix="ecp1_") + os.makedirs("{}/extension".format(ext_dir)) + ext_dir2 = tempfile.mkdtemp(prefix="ecp2_") + os.makedirs("{}/extension".format(ext_dir2)) + ext_name = "test_custom_ext_paths" + _create_extension(ext_name, ext_dir) + _create_extension(ext_name, ext_dir2) + ext_name2 = "test_custom_ext_paths_using_directory" + os.makedirs("{}/{}".format(ext_dir, ext_name2)) + _create_extension(ext_name2, ext_dir, ext_name2) + sep = ":" + node.append_conf( + "\nextension_control_path = '$system{s}{d1}{s}{d2}'\n".format( + s=sep, d1=ext_dir, d2=ext_dir2 + ) + ) + node.start() + user = "user01" + node.safe_psql("CREATE USER {}".format(user)) + ecp = node.safe_psql("show extension_control_path;") + assert ecp == "$system{s}{d1}{s}{d2}".format( + s=sep, d1=ext_dir, d2=ext_dir2 + ), "custom extension control directory path configured" + node.safe_psql("CREATE EXTENSION {}".format(ext_name)) + node.safe_psql("CREATE EXTENSION {}".format(ext_name2)) + assert node.safe_psql( + "select * from pg_available_extensions where name = '{}'".format(ext_name) + ) == "test_custom_ext_paths|1.0|1.0|{}/extension|Test extension_control_path".format( + ext_dir + ), "extension is shown correctly in pg_available_extensions" + assert node.safe_psql( + "select * from pg_available_extension_versions where name = '{}'".format( + ext_name + ) + ) == "test_custom_ext_paths|1.0|t|t|f|t|||{}/extension|Test extension_control_path".format( + ext_dir + ), "extension is shown correctly in pg_available_extension_versions" + assert node.safe_psql( + "select * from pg_available_extensions where name = '{}'".format(ext_name2) + ) == "test_custom_ext_paths_using_directory|1.0|1.0|{}/extension|Test extension_control_path".format( + ext_dir + ), "extension is shown correctly in pg_available_extensions" + assert node.safe_psql( + "select * from pg_available_extension_versions where name = '{}'".format( + ext_name2 + ) + ) == "test_custom_ext_paths_using_directory|1.0|t|t|f|t|||{}/extension|Test extension_control_path".format( + ext_dir + ), "extension is shown correctly in pg_available_extension_versions" + assert ( + node.psql_capture( + "select location from pg_available_extensions where name = '{}'".format( + ext_name2 + ), + connstr=node.connstr("postgres") + " user=" + user, + ).stdout + == "" + ), ( + "extension location is hidden in pg_available_extensions for users with " + "insufficient privilege" + ) + assert ( + node.psql_capture( + "select location from pg_available_extension_versions where name = '{}'".format( + ext_name2 + ), + connstr=node.connstr("postgres") + " user=" + user, + ).stdout + == "" + ), ( + "extension location is hidden in pg_available_extension_versions for " + "users with insufficient privilege" + ) + assert ( + node.safe_psql( + "select count(*) > 0 as ok from pg_available_extensions where name = 'plpgsql'" + ) + == "t" + ), "$system extension is shown correctly in pg_available_extensions" + assert ( + node.safe_psql( + "set extension_control_path = ''; select location from " + "pg_available_extensions where name = 'plpgsql'" + ) + == "$system" + ), ( + "$system location is shown correctly in pg_available_extensions with " + "empty extension_control_path" + ) + res = node.psql_capture("CREATE EXTENSION invalid") + assert res.rc == 3, "error creating an extension that does not exist" + assert re.search(r'ERROR: extension "invalid" is not available', res.stderr) diff --git a/src/test/modules/test_int128/meson.build b/src/test/modules/test_int128/meson.build index 74456112433c7..775a72c3c7893 100644 --- a/src/test/modules/test_int128/meson.build +++ b/src/test/modules/test_int128/meson.build @@ -30,4 +30,10 @@ tests += { ], 'deps': [test_int128], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_test_int128.py', + ], + 'deps': [test_int128], + }, } diff --git a/src/test/modules/test_int128/pyt/test_001_test_int128.py b/src/test/modules/test_int128/pyt/test_001_test_int128.py new file mode 100644 index 0000000000000..e556f3bfe7d84 --- /dev/null +++ b/src/test/modules/test_int128/pyt/test_001_test_int128.py @@ -0,0 +1,20 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_int128/t/001_test_int128.pl. + +Runs the test_int128 C executable (which exercises the 128-bit integer +emulation against native __int128 on a large random sample) and requires it to +produce no output. Skips when the build has no native int128 type. +""" + +import pytest + + +def test_001_test_int128(pg_bin): + """test_int128 runs cleanly (no stdout/stderr), or skips without int128.""" + size = 1_000_000 + result = pg_bin.run_command(["test_int128", str(size)]) + if "skipping tests" in result.stdout: + pytest.skip("no native int128 type") + assert result.stdout == "", "test_int128: no stdout" + assert result.stderr == "", "test_int128: no stderr" diff --git a/src/test/modules/test_json_parser/meson.build b/src/test/modules/test_json_parser/meson.build index 2688686e37b38..6cdb3af46b613 100644 --- a/src/test/modules/test_json_parser/meson.build +++ b/src/test/modules/test_json_parser/meson.build @@ -67,4 +67,17 @@ tests += { test_json_parser_perf, ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_test_json_parser_incremental.py', + 'pyt/test_002_inline.py', + 'pyt/test_003_test_semantic.py', + 'pyt/test_004_test_parser_perf.py', + ], + 'deps': [ + test_json_parser_incremental, + test_json_parser_incremental_shlib, + test_json_parser_perf, + ], + }, } diff --git a/src/test/modules/test_json_parser/pyt/test_001_test_json_parser_incremental.py b/src/test/modules/test_json_parser/pyt/test_001_test_json_parser_incremental.py new file mode 100644 index 0000000000000..bb9a5a86df23e --- /dev/null +++ b/src/test/modules/test_json_parser/pyt/test_001_test_json_parser_incremental.py @@ -0,0 +1,34 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl. + +The incremental JSON parser (both the in-tree and shlib builds, with and without +the -o semantic option) reports a usage error when given too few arguments, and +succeeds for every chunk size from 64 down to 1. +""" + +import os +import re + +_EXES = ( + ["test_json_parser_incremental"], + ["test_json_parser_incremental", "-o"], + ["test_json_parser_incremental_shlib"], + ["test_json_parser_incremental_shlib", "-o"], +) +_TEST_FILE = os.path.join(os.path.dirname(__file__), "..", "tiny.json") + + +def test_001_test_json_parser_incremental(pg_bin): + """Incremental JSON parser: usage error, then success for chunk sizes 64..1.""" + for exe in _EXES: + result = pg_bin.run_command(exe + ["-c", "10"]) + assert re.search( + r"Usage:", result.stderr + ), "error message if not enough arguments" + for size in range(64, 0, -1): + result = pg_bin.run_command(exe + ["-c", str(size), _TEST_FILE]) + assert re.search( + r"SUCCESS", result.stdout + ), "chunk size {}: test succeeds".format(size) + assert result.stderr == "", "chunk size {}: no error output".format(size) diff --git a/src/test/modules/test_json_parser/pyt/test_002_inline.py b/src/test/modules/test_json_parser/pyt/test_002_inline.py new file mode 100644 index 0000000000000..30f95c21e6a3d --- /dev/null +++ b/src/test/modules/test_json_parser/pyt/test_002_inline.py @@ -0,0 +1,150 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/test_json_parser/t/002_inline.pl. + +Drives the incremental JSON parser (in-tree and shlib builds, with and without +the -o semantic option) over many inline inputs in -r loop mode, which reparses +the input at every chunk size from min(len, 64) down to 1 and emits one +null-separated result per size. Each valid input must succeed at every chunk +size with no stderr; each invalid input must fail with the expected error. +""" + +import re +import subprocess + +_EXES = ( + ["test_json_parser_incremental"], + ["test_json_parser_incremental", "-o"], + ["test_json_parser_incremental_shlib"], + ["test_json_parser_incremental_shlib", "-o"], +) + +# (name, json input as bytes, expected error regex or None) extracted verbatim +# from the Perl test (Perl qr// (?^:...) wrappers stripped, \x{F5} -> \xf5). +_CASES = ( + ("number", b"12345", None), + ("string", b'"hello"', None), + ("false", b"false", None), + ("true", b"true", None), + ("null", b"null", None), + ("empty object", b"{}", None), + ("empty array", b"[]", None), + ("array with number", b"[12345]", None), + ("array with numbers", b"[12345,67890]", None), + ("array with null", b"[null]", None), + ("array with string", b'["hello"]', None), + ("array with boolean", b"[false]", None), + ("single pair", b'{"key": "value"}', None), + ( + "heavily nested array", + b"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]", + None, + ), + ("serial escapes", b'"\\\\\\\\\\\\\\\\"', None), + ("interrupted escapes", b'"\\\\\\"\\\\\\\\\\"\\\\"', None), + ("whitespace", b' "" ', None), + ("unclosed empty object", b"{", "input string ended unexpectedly"), + ("bad key", b"{{", 'Expected string or "}", but found "\\{"'), + ("bad key", b"{{}", 'Expected string or "}", but found "\\{"'), + ("numeric key", b"{1234: 2}", 'Expected string or "}", but found "1234"'), + ("second numeric key", b'{"a": "a", 1234: 2}', 'Expected string, but found "1234"'), + ( + "unclosed object with pair", + b'{"key": "value"', + "input string ended unexpectedly", + ), + ("missing key value", b'{"key": }', 'Expected JSON value, but found "}"'), + ("missing colon", b'{"key" 12345}', 'Expected ":", but found "12345"'), + ( + "missing comma", + b'{"key": 12345 12345}', + 'Expected "," or "}", but found "12345"', + ), + ( + "overnested array", + b"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", + "maximum permitted depth is 6400", + ), + ("overclosed array", b"[]]", 'Expected end of input, but found "]"'), + ( + "unexpected token in array", + b"[ }}} ]", + 'Expected array element or "]", but found "}"', + ), + ("junk punctuation", b"[ ||| ]", 'Token "|" is invalid'), + ("missing comma in array", b"[123 123]", 'Expected "," or "]", but found "123"'), + ("misspelled boolean", b"tru", 'Token "tru" is invalid'), + ("misspelled boolean in array", b"[tru]", 'Token "tru" is invalid'), + ("smashed top-level scalar", b"12zz", 'Token "12zz" is invalid'), + ("smashed scalar in array", b"[12zz]", 'Token "12zz" is invalid'), + ( + "unknown escape sequence", + b'"hello\\vworld"', + 'Escape sequence "\\\\v" is invalid', + ), + ( + "unescaped control", + b'"hello\tworld"', + "Character with value 0x09 must be escaped", + ), + ( + "incorrect escape count", + b'"\\\\\\\\\\\\\\"', + 'Token ""\\\\\\\\\\\\\\\\\\\\\\\\\\\\"" is invalid', + ), + ( + "incomplete UTF-8 sequence", + b'"\\\xf5', + '(Token|Escape sequence) ""?\\\\\\xf5" is invalid', + ), +) + + +def _split_nul(text): + """Split null-separated parser output (Perl unpack '(Z*)*'), dropping a + single trailing empty produced by a terminating null. + """ + parts = text.split("\0") + if parts and parts[-1] == "": + parts.pop() + return parts + + +def _run_case(exe, name, json_bytes, error, tmp_path): + """Run one inline case across all chunk sizes and check each result.""" + chunk = min(len(json_bytes), 64) + fname = tmp_path / "inline.json" + fname.write_bytes(json_bytes) + proc = subprocess.run( + exe + ["-r", str(chunk), str(fname)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + ) + stdout = _split_nul(proc.stdout.decode("latin-1")) + stderr = _split_nul(proc.stderr.decode("latin-1")) + assert len(stdout) == chunk, "{}: stdout has correct number of entries".format(name) + assert len(stderr) == chunk, "{}: stderr has correct number of entries".format(name) + for i, size in enumerate(reversed(range(1, chunk + 1))): + if error is not None: + assert not re.search( + "SUCCESS", stdout[i] + ), "{}, chunk size {}: test fails".format(name, size) + assert re.search( + error, stderr[i] + ), "{}, chunk size {}: correct error output".format(name, size) + else: + assert re.search( + "SUCCESS", stdout[i] + ), "{}, chunk size {}: test succeeds".format(name, size) + assert stderr[i] == "", "{}, chunk size {}: no error output".format( + name, size + ) + + +def test_002_inline(tmp_path): + """Every inline JSON case parses correctly at all chunk sizes, all variants.""" + for exe in _EXES: + for name, json_bytes, error in _CASES: + _run_case(exe, name, json_bytes, error, tmp_path) diff --git a/src/test/modules/test_json_parser/pyt/test_003_test_semantic.py b/src/test/modules/test_json_parser/pyt/test_003_test_semantic.py new file mode 100644 index 0000000000000..b4813718a029e --- /dev/null +++ b/src/test/modules/test_json_parser/pyt/test_003_test_semantic.py @@ -0,0 +1,28 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_json_parser/t/003_test_semantic.pl. + +The incremental JSON parser driven with semantic routines (all four executable +variants) produces output identical to the expected tiny.out for tiny.json. +""" + +import os + +_EXES = ( + ["test_json_parser_incremental"], + ["test_json_parser_incremental", "-o"], + ["test_json_parser_incremental_shlib"], + ["test_json_parser_incremental_shlib", "-o"], +) +_TEST_FILE = os.path.join(os.path.dirname(__file__), "..", "tiny.json") +_TEST_OUT = os.path.join(os.path.dirname(__file__), "..", "tiny.out") + + +def test_003_test_semantic(pg_bin): + """Semantic-routine output matches the expected tiny.out for every variant.""" + with open(_TEST_OUT, encoding="utf-8") as fh: + expected = fh.read() + for exe in _EXES: + result = pg_bin.run_command(exe + ["-s", _TEST_FILE]) + assert result.stderr == "", "no error output" + assert result.stdout + "\n" == expected, "no output diff" diff --git a/src/test/modules/test_json_parser/pyt/test_004_test_parser_perf.py b/src/test/modules/test_json_parser/pyt/test_004_test_parser_perf.py new file mode 100644 index 0000000000000..4c578a0943e83 --- /dev/null +++ b/src/test/modules/test_json_parser/pyt/test_004_test_parser_perf.py @@ -0,0 +1,24 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_json_parser/t/004_test_parser_perf.pl. + +The JSON parser performance harness runs to a clean exit over a 50x-replicated +input with both the recursive-descent and table-driven parsers. +""" + +import os + +import pypg + +_TEST_FILE = os.path.join(os.path.dirname(__file__), "..", "tiny.json") + + +def test_004_test_parser_perf(pg_bin, tmp_path): + """The perf harness exits 0 with the recursive-descent and table-driven parsers.""" + contents = pypg.slurp_file(_TEST_FILE) + fname = tmp_path / "perf.json" + fname.write_text("[" + contents + ("," + contents) * 49 + "]", encoding="utf-8") + result = pg_bin.result(["test_json_parser_perf", "1", str(fname)]) + assert result.rc == 0, "perf test runs with recursive descent parser" + result = pg_bin.result(["test_json_parser_perf", "-i", "1", str(fname)]) + assert result.rc == 0, "perf test runs with table driven parser" diff --git a/src/test/modules/test_misc/meson.build b/src/test/modules/test_misc/meson.build index 969e90b396da0..44b3c03f17163 100644 --- a/src/test/modules/test_misc/meson.build +++ b/src/test/modules/test_misc/meson.build @@ -4,6 +4,26 @@ tests += { 'name': 'test_misc', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_001_constraint_validation.py', + 'pyt/test_002_tablespace.py', + 'pyt/test_003_check_guc.py', + 'pyt/test_004_io_direct.py', + 'pyt/test_008_replslot_single_user.py', + 'pyt/test_009_log_temp_files.py', + 'pyt/test_010_index_concurrently_upsert.py', + 'pyt/test_005_timeouts.py', + 'pyt/test_006_signal_autovacuum.py', + 'pyt/test_007_catcache_inval.py', + 'pyt/test_011_lock_stats.py', + 'pyt/test_012_ddlutils.py', + 'pyt/test_013_temp_obj_multisession.py', + ], + }, 'tap': { 'env': { 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', diff --git a/src/test/modules/test_misc/pyt/test_001_constraint_validation.py b/src/test/modules/test_misc/pyt/test_001_constraint_validation.py new file mode 100644 index 0000000000000..2a3c3414bb915 --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_001_constraint_validation.py @@ -0,0 +1,380 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_misc/t/001_constraint_validation.pl. + +Verify that ALTER TABLE optimizes certain operations as expected. + +With client_min_messages = DEBUG1, ALTER TABLE emits "verifying table" when it +performs a validation scan, and "existing constraints ... are sufficient to +prove ..." / "partition constraint ... is implied by existing constraints" +when it can skip the scan. Each scenario runs DDL and inspects psql's stderr +(which carries the DEBUG output) to confirm whether a scan happened. +""" + +import re + + +def test_001_constraint_validation(create_pg): + """ALTER TABLE skips/performs validation scans as expected.""" + # Initialize a test cluster + node = create_pg("primary", start=False) + # Turn message level up to DEBUG1 so that we get the messages we want to see + node.append_conf("client_min_messages = DEBUG1") + node.start() + + def run_sql_command(sql): + """Run a SQL command and return psql's stderr (including debug + messages), mirroring the Perl run_sql_command sub (on_error_die).""" + result = node.psql_capture(sql, on_error_stop=True) + assert result.rc == 0, "psql failed: {}".format(result.stderr) + return result.stderr + + def is_table_verified(output): + """Check whether result of run_sql_command shows that we did a verify + pass (mirrors the Perl is_table_verified sub).""" + return "DEBUG: verifying table" in output + + def like(output, pattern, _msg): + assert re.search(pattern, output), "{}\noutput:\n{}".format(_msg, output) + + def unlike(output, pattern, _msg): + assert not re.search(pattern, output), "{}\noutput:\n{}".format(_msg, output) + + # note "test alter table set not null"; + + run_sql_command( + "create table atacc1 (test_a int, test_b int);\n" + "\t insert into atacc1 values (1, 2);" + ) + + output = run_sql_command("alter table atacc1 alter test_a set not null;") + assert is_table_verified(output), "column test_a without constraint will scan table" + + run_sql_command( + "alter table atacc1 alter test_a drop not null;\n" + "\t alter table atacc1 add constraint atacc1_constr_a_valid\n" + "\t check(test_a is not null);" + ) + + # normal run will verify table data + output = run_sql_command("alter table atacc1 alter test_a set not null;") + assert not is_table_verified(output), "with constraint will not scan table" + like( + output, + r'existing constraints on column "atacc1.test_a" are sufficient to prove that it does not contain nulls', + "test_a proved by constraints", + ) + + run_sql_command("alter table atacc1 alter test_a drop not null;") + + # we have check only for test_a column, so we need verify table for test_b + output = run_sql_command( + "alter table atacc1 alter test_b set not null, alter test_a set not null;" + ) + assert is_table_verified(output), "table was scanned" + # we may miss debug message for test_a constraint because we need verify + # table due test_b + unlike( + output, + r'existing constraints on column "atacc1.test_b" are sufficient to prove that it does not contain nulls', + "test_b not proved by wrong constraints", + ) + run_sql_command( + "alter table atacc1 alter test_a drop not null, alter test_b drop not null;" + ) + + # test with both columns having check constraints + run_sql_command( + "alter table atacc1 add constraint atacc1_constr_b_valid check(test_b is not null);" + ) + output = run_sql_command( + "alter table atacc1 alter test_b set not null, alter test_a set not null;" + ) + assert not is_table_verified(output), "table was not scanned for both columns" + like( + output, + r'existing constraints on column "atacc1.test_a" are sufficient to prove that it does not contain nulls', + "test_a proved by constraints", + ) + like( + output, + r'existing constraints on column "atacc1.test_b" are sufficient to prove that it does not contain nulls', + "test_b proved by constraints", + ) + run_sql_command("drop table atacc1;") + + # note "test alter table attach partition"; + + run_sql_command( + "CREATE TABLE list_parted2 (\n" + "\ta int,\n" + "\tb char\n" + "\t) PARTITION BY LIST (a);\n" + "\tCREATE TABLE part_3_4 (\n" + "\tLIKE list_parted2,\n" + "\tCONSTRAINT check_a CHECK (a IN (3)));" + ) + + # need NOT NULL to skip table scan + output = run_sql_command( + "ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4);" + ) + assert is_table_verified(output), "table part_3_4 scanned" + + run_sql_command( + "ALTER TABLE list_parted2 DETACH PARTITION part_3_4;\n" + "\t ALTER TABLE part_3_4 ALTER a SET NOT NULL;" + ) + + output = run_sql_command( + "ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4);" + ) + assert not is_table_verified(output), "table part_3_4 not scanned" + like( + output, + r'partition constraint for table "part_3_4" is implied by existing constraints', + "part_3_4 verified by existing constraints", + ) + + # test attach default partition + run_sql_command( + "CREATE TABLE list_parted2_def (\n" + "\tLIKE list_parted2,\n" + "\tCONSTRAINT check_a CHECK (a IN (5, 6)));" + ) + output = run_sql_command( + "ALTER TABLE list_parted2 ATTACH PARTITION list_parted2_def default;" + ) + assert not is_table_verified(output), "table list_parted2_def not scanned" + like( + output, + r'partition constraint for table "list_parted2_def" is implied by existing constraints', + "list_parted2_def verified by existing constraints", + ) + + output = run_sql_command( + "CREATE TABLE part_55_66 PARTITION OF list_parted2 FOR VALUES IN (55, 66);" + ) + assert not is_table_verified(output), "table list_parted2_def not scanned" + like( + output, + r'updated partition constraint for default partition "list_parted2_def" is implied by existing constraints', + "updated partition constraint for default partition list_parted2_def", + ) + + # test attach another partitioned table + run_sql_command( + "CREATE TABLE part_5 (\n" + "\tLIKE list_parted2\n" + "\t) PARTITION BY LIST (b);\n" + "\tCREATE TABLE part_5_a PARTITION OF part_5 FOR VALUES IN ('a');\n" + "\tALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 5);" + ) + output = run_sql_command( + "ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5);" + ) + unlike(output, r'verifying table "part_5"', "table part_5 not scanned") + like( + output, + r'verifying table "list_parted2_def"', + "list_parted2_def scanned", + ) + like( + output, + r'partition constraint for table "part_5" is implied by existing constraints', + "part_5 verified by existing constraints", + ) + + run_sql_command( + "ALTER TABLE list_parted2 DETACH PARTITION part_5;\n" + "\t ALTER TABLE part_5 DROP CONSTRAINT check_a;" + ) + + # scan should again be skipped, even though NOT NULL is now a column property + run_sql_command( + "ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IN (5)),\n" + "\t ALTER a SET NOT NULL;" + ) + output = run_sql_command( + "ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5);" + ) + unlike(output, r'verifying table "part_5"', "table part_5 not scanned") + like( + output, + r'verifying table "list_parted2_def"', + "list_parted2_def scanned", + ) + like( + output, + r'partition constraint for table "part_5" is implied by existing constraints', + "part_5 verified by existing constraints", + ) + + # Check the case where attnos of the partitioning columns in the table being + # attached differs from the parent. It should not affect the constraint- + # checking logic that allows to skip the scan. + run_sql_command( + "CREATE TABLE part_6 (\n" + "\tc int,\n" + "\tLIKE list_parted2,\n" + "\tCONSTRAINT check_a CHECK (a IS NOT NULL AND a = 6)\n" + "\t);\n" + "\tALTER TABLE part_6 DROP c;" + ) + output = run_sql_command( + "ALTER TABLE list_parted2 ATTACH PARTITION part_6 FOR VALUES IN (6);" + ) + unlike(output, r'verifying table "part_6"', "table part_6 not scanned") + like( + output, + r'verifying table "list_parted2_def"', + "list_parted2_def scanned", + ) + like( + output, + r'partition constraint for table "part_6" is implied by existing constraints', + "part_6 verified by existing constraints", + ) + + # Similar to above, but the table being attached is a partitioned table + # whose partition has still different attnos for the root partitioning + # columns. + run_sql_command( + "CREATE TABLE part_7 (\n" + "\tLIKE list_parted2,\n" + "\tCONSTRAINT check_a CHECK (a IS NOT NULL AND a = 7)\n" + "\t) PARTITION BY LIST (b);\n" + "\tCREATE TABLE part_7_a_null (\n" + "\tc int,\n" + "\td int,\n" + "\te int,\n" + "\tLIKE list_parted2, -- a will have attnum = 4\n" + "\tCONSTRAINT check_b CHECK (b IS NULL OR b = 'a'),\n" + "\tCONSTRAINT check_a CHECK (a IS NOT NULL AND a = 7)\n" + "\t);\n" + "\tALTER TABLE part_7_a_null DROP c, DROP d, DROP e;" + ) + + output = run_sql_command( + "ALTER TABLE part_7 ATTACH PARTITION part_7_a_null FOR VALUES IN ('a', null);" + ) + assert not is_table_verified(output), "table not scanned" + like( + output, + r'partition constraint for table "part_7_a_null" is implied by existing constraints', + "part_7_a_null verified by existing constraints", + ) + output = run_sql_command( + "ALTER TABLE list_parted2 ATTACH PARTITION part_7 FOR VALUES IN (7);" + ) + assert not is_table_verified(output), "tables not scanned" + like( + output, + r'partition constraint for table "part_7" is implied by existing constraints', + "part_7 verified by existing constraints", + ) + like( + output, + r'updated partition constraint for default partition "list_parted2_def" is implied by existing constraints', + "updated partition constraint for default partition list_parted2_def", + ) + + run_sql_command( + "CREATE TABLE range_parted (\n" + "\ta int,\n" + "\tb int\n" + "\t) PARTITION BY RANGE (a, b);\n" + "\tCREATE TABLE range_part1 (\n" + "\ta int NOT NULL CHECK (a = 1),\n" + "\tb int NOT NULL);" + ) + + output = run_sql_command( + "ALTER TABLE range_parted ATTACH PARTITION range_part1 FOR VALUES FROM (1, 1) TO (1, 10);" + ) + assert is_table_verified(output), "table range_part1 scanned" + unlike( + output, + r'partition constraint for table "range_part1" is implied by existing constraints', + "range_part1 not verified by existing constraints", + ) + + run_sql_command( + "CREATE TABLE range_part2 (\n" + "\ta int NOT NULL CHECK (a = 1),\n" + "\tb int NOT NULL CHECK (b >= 10 and b < 18)\n" + ");" + ) + output = run_sql_command( + "ALTER TABLE range_parted ATTACH PARTITION range_part2 FOR VALUES FROM (1, 10) TO (1, 20);" + ) + assert not is_table_verified(output), "table range_part2 not scanned" + like( + output, + r'partition constraint for table "range_part2" is implied by existing constraints', + "range_part2 verified by existing constraints", + ) + + # If a partitioned table being created or an existing table being attached + # as a partition does not have a constraint that would allow validation scan + # to be skipped, but an individual partition does, then the partition's + # validation scan is skipped. + run_sql_command( + "CREATE TABLE quuux (a int, b text) PARTITION BY LIST (a);\n" + "\tCREATE TABLE quuux_default PARTITION OF quuux DEFAULT PARTITION BY LIST (b);\n" + "\tCREATE TABLE quuux_default1 PARTITION OF quuux_default (\n" + "\tCONSTRAINT check_1 CHECK (a IS NOT NULL AND a = 1)\n" + "\t) FOR VALUES IN ('b');\n" + "\tCREATE TABLE quuux1 (a int, b text);" + ) + + output = run_sql_command( + "ALTER TABLE quuux ATTACH PARTITION quuux1 FOR VALUES IN (1);" + ) + assert is_table_verified(output), "quuux1 table scanned" + unlike( + output, + r'partition constraint for table "quuux1" is implied by existing constraints', + "quuux1 verified by existing constraints", + ) + + run_sql_command("CREATE TABLE quuux2 (a int, b text);") + output = run_sql_command( + "ALTER TABLE quuux ATTACH PARTITION quuux2 FOR VALUES IN (2);" + ) + unlike( + output, + r'verifying table "quuux_default1"', + "quuux_default1 not scanned", + ) + like(output, r'verifying table "quuux2"', "quuux2 scanned") + like( + output, + r'updated partition constraint for default partition "quuux_default1" is implied by existing constraints', + "updated partition constraint for default partition quuux_default1", + ) + run_sql_command("DROP TABLE quuux1, quuux2;") + + # should validate for quuux1, but not for quuux2 + output = run_sql_command( + "CREATE TABLE quuux1 PARTITION OF quuux FOR VALUES IN (1);" + ) + assert not is_table_verified(output), "tables not scanned" + unlike( + output, + r'partition constraint for table "quuux1" is implied by existing constraints', + "quuux1 verified by existing constraints", + ) + output = run_sql_command( + "CREATE TABLE quuux2 PARTITION OF quuux FOR VALUES IN (2);" + ) + assert not is_table_verified(output), "tables not scanned" + like( + output, + r'updated partition constraint for default partition "quuux_default1" is implied by existing constraints', + "updated partition constraint for default partition quuux_default1", + ) + run_sql_command("DROP TABLE quuux;") + + node.stop("fast") diff --git a/src/test/modules/test_misc/pyt/test_002_tablespace.py b/src/test/modules/test_misc/pyt/test_002_tablespace.py new file mode 100644 index 0000000000000..344b1f70af533 --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_002_tablespace.py @@ -0,0 +1,85 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_misc/t/002_tablespace.pl. + +CREATE/DROP TABLESPACE and moving tables between tablespaces, covering +absolute-path tablespaces and in-place tablespaces (allow_in_place_tablespaces), +including the expected failures (clobbering an existing tablespace, dropping a +non-empty tablespace). +""" + +import os + + +def _ok(node, sql, msg): + assert node.psql_capture(sql).rc == 0, msg + + +def _fail(node, sql, msg): + assert node.psql_capture(sql).rc != 0, msg + + +def test_002_tablespace(create_pg): + """Tablespace create/drop and table moves across abs and in-place dirs.""" + node = create_pg("main") + ts1 = os.path.join(node.basedir, "ts1") + ts2 = os.path.join(node.basedir, "ts2") + os.mkdir(ts1) + os.mkdir(ts2) + _ok( + node, + "CREATE TABLESPACE regress_ts1 LOCATION '{}'".format(ts1), + "create tablespace with absolute path", + ) + _fail( + node, + "CREATE TABLESPACE regress_ts1 LOCATION '{}'".format(ts1), + "clobber tablespace with absolute path", + ) + _ok( + node, + "CREATE TABLE t () TABLESPACE regress_ts1", + "create table in tablespace with absolute path", + ) + _fail(node, "DROP TABLESPACE regress_ts1", "drop non-empty tablespace fails") + _ok(node, "DROP TABLE t", "drop table in tablespace with absolute path") + _ok(node, "DROP TABLESPACE regress_ts1", "drop tablespace with absolute path") + _ok( + node, + "CREATE TABLESPACE regress_ts1 LOCATION '{}'".format(ts1), + "create tablespace 1 with absolute path", + ) + _ok( + node, + "CREATE TABLESPACE regress_ts2 LOCATION '{}'".format(ts2), + "create tablespace 2 with absolute path", + ) + _ok( + node, + "SET allow_in_place_tablespaces=on; CREATE TABLESPACE regress_ts3 " + "LOCATION ''", + "create tablespace 3 with in-place directory", + ) + _ok( + node, + "SET allow_in_place_tablespaces=on; CREATE TABLESPACE regress_ts4 " + "LOCATION ''", + "create tablespace 4 with in-place directory", + ) + _ok(node, "CREATE TABLE t () TABLESPACE regress_ts1", "create table in ts1") + _ok(node, "ALTER TABLE t SET tablespace regress_ts2", "move table abs->abs") + _ok(node, "ALTER TABLE t SET tablespace regress_ts3", "move table abs->in-place") + _ok( + node, + "ALTER TABLE t SET tablespace regress_ts4", + "move table in-place->in-place", + ) + _ok(node, "ALTER TABLE t SET tablespace regress_ts1", "move table in-place->abs") + _ok(node, "DROP TABLE t", "drop table in ts1") + for i in (1, 2, 3, 4): + _ok( + node, + "DROP TABLESPACE regress_ts{}".format(i), + "drop tablespace {}".format(i), + ) + node.stop() diff --git a/src/test/modules/test_misc/pyt/test_003_check_guc.py b/src/test/modules/test_misc/pyt/test_003_check_guc.py new file mode 100644 index 0000000000000..62d751ba8a87a --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_003_check_guc.py @@ -0,0 +1,64 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_misc/t/003_check_guc.pl. + +postgresql.conf.sample must stay in sync with guc_tables.c: every in-sample GUC +appears in the file (and vice versa), no GUC marked NOT_IN_SAMPLE appears in the +file, and the file has no tab characters. +""" + +import os +import re + + +def test_003_check_guc(create_pg): + """postgresql.conf.sample lists exactly the in-sample GUCs, no tabs.""" + node = create_pg("main") + all_params = ( + node.safe_psql( + "SELECT name FROM pg_settings\n" + "WHERE NOT 'NOT_IN_SAMPLE' = ANY (pg_settings_get_flags(name)) AND\n" + "name <> 'config_file' AND category <> 'Customized Options'\n" + "ORDER BY 1" + ) + .lower() + .split("\n") + ) + not_in_sample = set( + node.safe_psql( + "SELECT name FROM pg_settings\n" + "WHERE 'NOT_IN_SAMPLE' = ANY (pg_settings_get_flags(name))\nORDER BY 1" + ) + .lower() + .split("\n") + ) + share_dir = node.config_data("--sharedir") + sample_file = os.path.join(share_dir, "postgresql.conf.sample") + gucs_in_file = [] + lines_with_tabs = [] + ignore = {"include", "include_dir", "include_if_exists"} + with open(sample_file, encoding="utf-8") as fh: + for line_num, line in enumerate(fh, start=1): + if "\t" in line: + lines_with_tabs.append(line_num) + match = re.match(r"^#([_a-zA-Z0-9]+) = .*", line) + if match: + name = match.group(1).lower() + if name not in ignore: + gucs_in_file.append(name) + continue + assert not re.match( + r"^\s*[^#\s]", line + ), "{} missing initial # in postgresql.conf.sample".format(line) + gucs_set = set(gucs_in_file) + all_set = set(all_params) + assert [ + p for p in all_params if p not in gucs_set + ] == [], "no parameters missing from postgresql.conf.sample" + assert [ + p for p in gucs_in_file if p not in all_set + ] == [], "no parameters missing from guc_tables.c" + assert [ + p for p in gucs_in_file if p in not_in_sample + ] == [], "no parameters marked as NOT_IN_SAMPLE in postgresql.conf.sample" + assert not lines_with_tabs, "no lines with tabs in postgresql.conf.sample" diff --git a/src/test/modules/test_misc/pyt/test_004_io_direct.py b/src/test/modules/test_misc/pyt/test_004_io_direct.py new file mode 100644 index 0000000000000..d7517905b8622 --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_004_io_direct.py @@ -0,0 +1,62 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_misc/t/004_io_direct.pl. + +Exercises debug_io_direct: with tiny shared_buffers forcing real I/O, data is +written and read back through shared buffers and local (temp-table) buffers, and +survives an immediate-stop crash plus recovery. On non-macOS/Windows it first +checks that the filesystem under tmp_check actually supports opening a file with +O_DIRECT, skipping if not (matching the Perl pre-flight). +""" + +import os +import sys + +import pytest + + +def test_004_io_direct(create_pg, tmp_check): + """debug_io_direct round-trips data through shared/local buffers and recovery.""" + if sys.platform not in ("darwin", "win32"): + # Perl's Fcntl knows if this system has O_DIRECT in . + if hasattr(os, "O_DIRECT"): + # Can we open a file in O_DIRECT mode in the file system where + # tmp_check lives? + path = tmp_check / "test_o_direct_file" + try: + fd = os.open(str(path), os.O_RDWR | os.O_DIRECT | os.O_CREAT) + except OSError as exc: + pytest.skip( + "pre-flight test if we can open a file with O_DIRECT " + "failed: {}".format(exc) + ) + os.close(fd) + else: + pytest.skip("no O_DIRECT") + node = create_pg("main", start=False) + node.append_conf( + "\n" + "debug_io_direct = 'data,wal,wal_init'\n" + "shared_buffers = '256kB' # tiny to force I/O\n" + "wal_level = replica # minimal runs out of shared_buffers when set so tiny\n" + ) + node.start() + node.safe_psql("create table t1 as select 1 as i from generate_series(1, 10000)") + node.safe_psql("create table t2count (i int)") + node.safe_psql( + "\n" + "begin;\n" + "create temporary table t2 as select 1 as i from generate_series(1, 10000);\n" + "update t2 set i = i;\n" + "insert into t2count select count(*) from t2;\n" + "commit;\n" + ) + node.safe_psql("update t1 set i = i") + assert node.safe_psql("select count(*) from t1") == "10000", "read back from shared" + assert node.safe_psql("select * from t2count") == "10000", "read back from local" + node.stop("immediate") + node.start() + assert ( + node.safe_psql("select count(*) from t1") == "10000" + ), "read back from shared after crash recovery" + node.stop() diff --git a/src/test/modules/test_misc/pyt/test_005_timeouts.py b/src/test/modules/test_misc/pyt/test_005_timeouts.py new file mode 100644 index 0000000000000..8e03039807504 --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_005_timeouts.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements,implicit-str-concat +"""Port of src/test/modules/test_misc/t/005_timeouts.pl. + +FATAL timeout handling (transaction_timeout, idle_in_transaction_session_timeout, idle_session_timeout) using injection points to deterministically await the timeout; verifies the backend is terminated with the expected log message. Skips without injection points. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import os +import pytest + + +def test_005_timeouts(create_pg): + """FATAL session/transaction timeout handling (gated on injection points).""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("master", start=False) + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql("CREATE EXTENSION injection_points;") + node.safe_psql("SELECT injection_points_attach('transaction-timeout', 'wait');") + psql_session = node.background_psql("postgres") + psql_session.query_until( + r"""starting_bg_psql""", + "\\echo starting_bg_psql\n" + " SET transaction_timeout to '10ms';\n" + " BEGIN;\n" + " SELECT 1 \\watch 0.001\n" + " \\q\n", + ) + node.wait_for_event("client backend", "transaction-timeout") + log_offset = node.current_log_position() + node.safe_psql("SELECT injection_points_wakeup('transaction-timeout');") + node.wait_for_log( + r"""terminating connection due to transaction timeout""", log_offset + ) + psql_session.quit() + node.safe_psql( + "SELECT injection_points_attach('idle-in-transaction-session-timeout', 'wait');" + ) + psql_session = node.background_psql("postgres") + psql_session.query_until( + r"""starting_bg_psql""", + "\n \\echo starting_bg_psql\n" + " SET idle_in_transaction_session_timeout to '10ms';\n" + " BEGIN;\n", + ) + node.wait_for_event("client backend", "idle-in-transaction-session-timeout") + log_offset = node.current_log_position() + node.safe_psql( + "SELECT injection_points_wakeup('idle-in-transaction-session-timeout');" + ) + node.wait_for_log( + r"""terminating connection due to idle-in-transaction timeout""", log_offset + ) + assert psql_session.quit() == 0, "" + node.safe_psql("SELECT injection_points_attach('idle-session-timeout', 'wait');") + psql_session = node.background_psql("postgres") + psql_session.query_until( + r"""starting_bg_psql""", + "\n \\echo starting_bg_psql\n" " SET idle_session_timeout to '10ms';\n", + ) + node.wait_for_event("client backend", "idle-session-timeout") + log_offset = node.current_log_position() + node.safe_psql("SELECT injection_points_wakeup('idle-session-timeout');") + node.wait_for_log( + r"""terminating connection due to idle-session timeout""", log_offset + ) + assert psql_session.quit() == 0, "" diff --git a/src/test/modules/test_misc/pyt/test_006_signal_autovacuum.py b/src/test/modules/test_misc/pyt/test_006_signal_autovacuum.py new file mode 100644 index 0000000000000..e849b9ae45f9c --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_006_signal_autovacuum.py @@ -0,0 +1,114 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_misc/t/006_signal_autovacuum.pl. + +Test signaling autovacuum worker with pg_signal_autovacuum_worker. + +Only roles with privileges of pg_signal_autovacuum_worker are allowed to +signal autovacuum workers. This test uses an injection point located at the +beginning of the autovacuum worker startup. Skips without injection points. +""" + +import os +import re + +import pytest + + +def test_006_signal_autovacuum(create_pg): + """Only pg_signal_autovacuum_worker members may terminate av workers.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + + # Initialize postgres + node = create_pg("node", start=False) + + # This ensures a quick worker spawn. + node.append_conf("autovacuum_naptime = 1") + node.start() + + # Check if the extension injection_points is available, as it may be + # possible that this script is run with installcheck, where the module + # would not be installed by default. + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + + node.safe_psql("CREATE EXTENSION injection_points;") + + node.safe_psql( + """ + CREATE ROLE regress_regular_role; + CREATE ROLE regress_worker_role; + GRANT pg_signal_autovacuum_worker TO regress_worker_role; +""" + ) + + # From this point, autovacuum worker will wait at startup. + node.safe_psql("SELECT injection_points_attach('autovacuum-worker-start', 'wait');") + + # Accelerate worker creation in case we reach this point before the naptime + # ends. + node.reload() + + # Wait until an autovacuum worker starts. + node.wait_for_event("autovacuum worker", "autovacuum-worker-start") + + # And grab one of them. + av_pid = node.safe_psql( + """ + SELECT pid FROM pg_stat_activity WHERE backend_type = 'autovacuum worker' """ + """AND wait_event = 'autovacuum-worker-start' LIMIT 1; +""" + ) + + # Regular role cannot terminate autovacuum worker. + result = node.psql_capture( + """ + SET ROLE regress_regular_role; + SELECT pg_terminate_backend('{}'); +""".format( + av_pid + ), + on_error_stop=False, + ) + + assert _like( + result.stderr, + r"ERROR: permission denied to terminate process\nDETAIL: " + r'Only roles with privileges of the "pg_signal_autovacuum_worker" ' + r"role may terminate autovacuum workers\.", + ), "autovacuum worker not signaled with regular role" + + offset = node.current_log_position() + + # Role with pg_signal_autovacuum_worker can terminate autovacuum worker. + node.psql_capture( + """ + SET ROLE regress_worker_role; + SELECT pg_terminate_backend('{}'); +""".format( + av_pid + ), + on_error_stop=False, + ) + + # Wait for the autovacuum worker to exit before scanning the logs. + node.poll_query_until( + "SELECT count(*) = 0 FROM pg_stat_activity " + "WHERE pid = '{}' AND backend_type = 'autovacuum worker';".format(av_pid) + ) + + # Check that the primary server logs a FATAL indicating that autovacuum + # is terminated. + assert node.log_matches( + r"FATAL: .*terminating autovacuum process due to administrator command", + offset, + ), "autovacuum worker signaled with pg_signal_autovacuum_worker granted" + + # Release injection point. + node.safe_psql("SELECT injection_points_detach('autovacuum-worker-start');") + + +def _like(text, pattern): + """Return True if pattern (a regex) matches text, mirroring Perl like().""" + return re.search(pattern, text) is not None diff --git a/src/test/modules/test_misc/pyt/test_007_catcache_inval.py b/src/test/modules/test_misc/pyt/test_007_catcache_inval.py new file mode 100644 index 0000000000000..df047e34c2c4a --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_007_catcache_inval.py @@ -0,0 +1,51 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_misc/t/007_catcache_inval.pl. + +Catalog-cache list invalidation race: with an injection point pausing a +catcache list miss mid systable scan, redefining a function concurrently must +not leave a stale catcache list entry. Skips without injection points. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import pypg + +import os +import pytest + + +def test_007_catcache_inval(create_pg): + """Generated golden port of 007_catcache_inval.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("node", start=False) + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql("CREATE EXTENSION injection_points;") + # randStr(10000): a long random comment filler to enlarge the function body + longtext = pypg.rand_str(10000) + node.safe_psql( + "CREATE FUNCTION foofunc(dummy integer) RETURNS integer AS $$ SELECT 1; /* " + + longtext + + " */ $$ LANGUAGE SQL" + ) + psql_session = node.background_psql("postgres") + psql_session2 = node.background_psql("postgres") + psql_session.query( + "SELECT injection_points_set_local();\n SELECT injection_points_attach('catcache-list-miss-systable-scan-started', 'wait');" + ) + psql_session.query_until( + r"""starting_bg_psql""", + "\n \\echo starting_bg_psql\n SELECT foofunc(1);\n", + ) + node.safe_psql( + "CREATE FUNCTION foofunc() RETURNS integer AS $$ SELECT 123 $$ LANGUAGE SQL" + ) + psql_session2.query( + "SELECT injection_points_wakeup('catcache-list-miss-systable-scan-started');\n SELECT injection_points_detach('catcache-list-miss-systable-scan-started');" + ) + psql_session.query("SELECT foofunc();") + assert psql_session.quit() == 0, "" + assert psql_session2.quit() == 0, "" diff --git a/src/test/modules/test_misc/pyt/test_008_replslot_single_user.py b/src/test/modules/test_misc/pyt/test_008_replslot_single_user.py new file mode 100644 index 0000000000000..95d29a412a109 --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_008_replslot_single_user.py @@ -0,0 +1,108 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_misc/t/008_replslot_single_user.pl. + +Replication-slot operations work in single-user mode (postgres --single): +creating logical/physical/temporary slots, logical decoding, advancing, copying, +and dropping slots all succeed when run as single-user SQL against a stopped +cluster's data directory. +""" + +import os +import subprocess +import sys + +import pytest + + +def _single_mode(node, queries, test_name): + result = subprocess.run( + [ + os.path.join(str(node.bin_dir), "postgres"), + "--single", + "-F", + "-c", + "exit_on_error=true", + "-D", + str(node.datadir), + "postgres", + ], + input=queries, + encoding="utf-8", + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=node.connenv, + check=False, + ) + assert result.returncode == 0, test_name + + +@pytest.mark.skipif(sys.platform == "win32", reason="not supported on Windows") +def test_008_replslot_single_user(create_pg): + """Replication-slot operations succeed in single-user mode.""" + slot_logical = "slot_logical" + slot_physical = "slot_physical" + node = create_pg("node", allows_streaming="logical") + node.safe_psql("CREATE TABLE foo (id int)") + node.stop() + _single_mode( + node, + "SELECT pg_create_logical_replication_slot('{}', 'test_decoding')".format( + slot_logical + ), + "logical slot creation", + ) + _single_mode( + node, + "SELECT pg_create_physical_replication_slot('{}', true)".format(slot_physical), + "physical slot creation", + ) + _single_mode( + node, + "SELECT pg_create_physical_replication_slot('slot_tmp', true, true)", + "temporary physical slot creation", + ) + _single_mode( + node, + "INSERT INTO foo VALUES (1);\n" + "SELECT pg_logical_slot_get_changes('{}', NULL, NULL);\n".format(slot_logical), + "logical decoding", + ) + _single_mode( + node, + "SELECT pg_replication_slot_advance('{}', pg_current_wal_lsn())".format( + slot_logical + ), + "logical slot advance", + ) + _single_mode( + node, + "SELECT pg_replication_slot_advance('{}', pg_current_wal_lsn())".format( + slot_physical + ), + "physical slot advance", + ) + _single_mode( + node, + "SELECT pg_copy_logical_replication_slot('{}', 'slot_log_copy')".format( + slot_logical + ), + "logical slot copy", + ) + _single_mode( + node, + "SELECT pg_copy_physical_replication_slot('{}', 'slot_phy_copy')".format( + slot_physical + ), + "physical slot copy", + ) + _single_mode( + node, + "SELECT pg_drop_replication_slot('{}')".format(slot_logical), + "logical slot drop", + ) + _single_mode( + node, + "SELECT pg_drop_replication_slot('{}')".format(slot_physical), + "physical slot drop", + ) diff --git a/src/test/modules/test_misc/pyt/test_009_log_temp_files.py b/src/test/modules/test_misc/pyt/test_009_log_temp_files.py new file mode 100644 index 0000000000000..7b54389bf650d --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_009_log_temp_files.py @@ -0,0 +1,102 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/test_misc/t/009_log_temp_files.pl. + +With log_temp_files=0 and tiny work_mem, a sort that spills to a temporary file +logs the temp-file removal attributed to the right statement across many +protocol/portal shapes: unnamed/named portals, extended-protocol bind without a +query, pipelined queries, simple queries, cursors (and WITH HOLD), and +prepare/execute. Bind-without-query cases log the temp file but no STATEMENT line. +""" + + +def _temp_then_statement(stmt): + return r"(?s)LOG:\s+temporary file: path.*\n.* STATEMENT:\s+" + stmt + + +def test_009_log_temp_files(create_pg): + """Temp-file logging is attributed to the right statement across portals.""" + node = create_pg("primary", start=False) + node.append_conf( + "\nwork_mem = 64kB\nlog_temp_files = 0\ndebug_parallel_query = off\n" + "log_error_verbosity = default\n" + ) + node.start() + node.safe_psql( + "CREATE UNLOGGED TABLE foo(a int);\n" + "INSERT INTO foo(a) SELECT * FROM generate_series(1, 5000);" + ) + off = node.current_log_position() + node.safe_psql( + "BEGIN;\nSELECT a FROM foo ORDER BY a OFFSET $1 \\bind 4990 \\g\n" + "SELECT 'unnamed portal';\nEND;" + ) + assert node.log_matches( + _temp_then_statement(r"SELECT 'unnamed portal'"), off + ), "unnamed portal" + off = node.current_log_position() + node.safe_psql("SELECT a FROM foo ORDER BY a OFFSET $1 \\bind 4991 \\g\n") + assert node.log_matches( + r"(?s)LOG:\s+temporary file:", off + ), "bind and implicit transaction, temporary file removed" + assert not node.log_matches( + r"(?s)STATEMENT:", off + ), "bind and implicit transaction, no statement logged" + node.safe_psql( + "BEGIN;\nSELECT a FROM foo ORDER BY a OFFSET $1 \\parse stmt\n" + "\\bind_named stmt 4999 \\g\nSELECT 'named portal';\nEND;" + ) + assert node.log_matches( + _temp_then_statement(r"SELECT 'named portal'"), off + ), "named portal" + off = node.current_log_position() + node.safe_psql( + "\\startpipeline\n" + "SELECT a FROM foo ORDER BY a OFFSET $1 \\bind 4992 \\sendpipeline\n" + "SELECT 'pipelined query';\n\\endpipeline\n" + ) + assert node.log_matches( + _temp_then_statement(r"SELECT 'pipelined query'"), off + ), "pipelined query" + off = node.current_log_position() + node.safe_psql( + "SELECT a, a, a FROM foo ORDER BY a OFFSET $1 \\parse p1\n" + "\\bind_named p1 4993 \\g\n" + ) + assert node.log_matches( + r"(?s)LOG:\s+temporary file:", off + ), "parse and bind, temporary file removed" + assert not node.log_matches( + r"(?s)STATEMENT:", off + ), "bind and bind, no statement logged" + off = node.current_log_position() + node.safe_psql("BEGIN;\nSELECT a FROM foo ORDER BY a OFFSET 4994;\nEND;") + assert node.log_matches( + _temp_then_statement(r"SELECT a FROM foo ORDER BY a OFFSET 4994;"), off + ), "simple query" + _cursor_and_prepare(node) + + +def _cursor_and_prepare(node): + off = node.current_log_position() + node.safe_psql( + "BEGIN;\nDECLARE mycur CURSOR FOR SELECT a FROM foo ORDER BY a OFFSET 4995;\n" + "FETCH 10 FROM mycur;\nSELECT 1;\nCLOSE mycur;\nEND;" + ) + assert node.log_matches(_temp_then_statement(r"CLOSE mycur;"), off), "cursor" + off = node.current_log_position() + node.safe_psql( + "BEGIN;\nDECLARE holdcur CURSOR WITH HOLD FOR SELECT a FROM foo ORDER BY a " + "OFFSET 4996;\nFETCH 10 FROM holdcur;\nCOMMIT;\nCLOSE holdcur;" + ) + assert node.log_matches(_temp_then_statement(r"COMMIT;"), off), "cursor WITH HOLD" + off = node.current_log_position() + node.safe_psql( + "BEGIN;\nPREPARE p1 AS SELECT a FROM foo ORDER BY a OFFSET 4997;\n" + "EXECUTE p1;\nDEALLOCATE p1;\nEND;" + ) + assert node.log_matches( + _temp_then_statement(r"EXECUTE p1;"), off + ), "prepare/execute" + node.stop("fast") diff --git a/src/test/modules/test_misc/pyt/test_010_index_concurrently_upsert.py b/src/test/modules/test_misc/pyt/test_010_index_concurrently_upsert.py new file mode 100644 index 0000000000000..2a4f1c3f2d346 --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_010_index_concurrently_upsert.py @@ -0,0 +1,956 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_misc/t/010_index_concurrently_upsert.pl. + +Test INSERT ON CONFLICT DO UPDATE behavior concurrent with CREATE INDEX +CONCURRENTLY and REINDEX CONCURRENTLY. + +These tests verify the fix for "duplicate key value violates unique +constraint" errors that occurred when infer_arbiter_indexes() only considered +indisvalid indexes, causing different transactions to use different arbiter +indexes. + +Skips without injection points. +""" + +import os +import re +import time + +import pytest + +import pypg + + +def _wait_for_injection_point(node, point_name, timeout=None): + """Wait for a session to hit an injection point. Returns True if found, + False if timeout. On timeout, logs diagnostic information about all active + queries (mirrors the Perl wait_for_injection_point sub).""" + if timeout is None: + timeout = pypg.test_timeout_default() / 2 + + for _ in range(int(timeout * 10)): + pid = node.safe_psql( + """ +\t\t\tSELECT pid FROM pg_stat_activity +\t\t\tWHERE wait_event_type = 'InjectionPoint' +\t\t\t AND wait_event = '{}' +\t\t\tLIMIT 1; +\t\t""".format( + point_name + ) + ) + if pid != "": + return True + time.sleep(0.1) + + # Timeout - report diagnostic information + activity = node.safe_psql( + """ +\t\tSELECT format('pid=%s, state=%s, wait_event_type=%s, wait_event=%s, backend_xmin=%s, backend_xid=%s, query=%s', +\t\t\tpid, state, wait_event_type, wait_event, backend_xmin, backend_xid, left(query, 100)) +\t\tFROM pg_stat_activity +\t\tORDER BY pid; +\t""" + ) + print( + "wait_for_injection_point timeout waiting for: {}\n" + "Current queries in pg_stat_activity:\n{}".format(point_name, activity) + ) + + return False + + +def _ok_injection_point(node, injection_point, testname=None): + """ok() a wait for the given injection point (mirrors ok_injection_point).""" + if testname is None: + testname = "hit injection point {}".format(injection_point) + assert _wait_for_injection_point(node, injection_point), testname + + +def _wait_for_idle(node, pid, timeout=None): + """Wait for a specific backend to become idle. Returns True if idle, + False if waiting for injection point or timeout (mirrors wait_for_idle).""" + if timeout is None: + timeout = pypg.test_timeout_default() / 2 + + for _ in range(int(timeout * 10)): + result = node.safe_psql( + "\n\t\t\tSELECT state, wait_event_type FROM pg_stat_activity " + "WHERE pid = {};\n\t\t".format(pid) + ) + state, _, wait_event_type = result.partition("|") + if state == "idle": + return True + if wait_event_type == "InjectionPoint": + return False + time.sleep(0.1) + return False + + +def _wakeup_injection_point(node, point_name): + """Detach and wakeup an injection point (mirrors wakeup_injection_point).""" + node.safe_psql( + """ +SELECT injection_points_detach('{point}'); +SELECT injection_points_wakeup('{point}'); +""".format( + point=point_name + ) + ) + + +def _safe_quit(session): + """Wait for any pending query to complete, capture stderr, and close the + session. Returns the stderr output (excluding internal markers). Mirrors + the Perl safe_quit sub.""" + banner = "safe_quit_marker" + banner_match = re.compile(r"(^|\n)" + banner + r"\r?\n") + + session.send("\\echo {b}\n\\warn {b}\n".format(b=banner)) + + # Send a marker and wait for it to ensure any pending query completes. + session.query_until(banner_match.pattern) + deadline = time.monotonic() + pypg.test_timeout_default() + while not banner_match.search(session.stderr): + if time.monotonic() > deadline: + raise TimeoutError("safe_quit timed out waiting for banner on stderr") + time.sleep(0.02) + + # Capture stderr (excluding the banner) + stderr = banner_match.sub("", session.stderr) + + # Close the session + session.quit() + + return stderr + + +def _clean_safe_quit_ok(*sessions): + """Verify that the given sessions exit cleanly (mirrors clean_safe_quit_ok).""" + for i, session in enumerate(sessions, start=1): + assert _safe_quit(session) == "", "session {} quit cleanly".format(i) + + +def test_010_index_concurrently_upsert(create_pg): + """UPSERT concurrent with (RE)INDEX CONCURRENTLY across many permutations.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + + # Node initialization + node = create_pg("node", start=False) + node.start() + + # Check if the extension injection_points is available + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + + node.safe_psql("CREATE EXTENSION injection_points;") + + node.safe_psql( + """ +CREATE SCHEMA test; +CREATE UNLOGGED TABLE test.tblpk (i int PRIMARY KEY, updated_at timestamp); +ALTER TABLE test.tblpk SET (parallel_workers=0); + +CREATE TABLE test.tblparted(i int primary key, updated_at timestamp) PARTITION BY RANGE (i); +CREATE TABLE test.tbl_partition PARTITION OF test.tblparted + FOR VALUES FROM (0) TO (10000) + WITH (parallel_workers = 0); + +CREATE UNLOGGED TABLE test.tblexpr(i int, updated_at timestamp); +CREATE UNIQUE INDEX tbl_pkey_special ON test.tblexpr(abs(i)) WHERE i < 1000; +ALTER TABLE test.tblexpr SET (parallel_workers=0); + +""" + ) + + ########################################################################## + # Test: REINDEX CONCURRENTLY + UPSERT (wakeup at set-dead phase) + + # Create sessions with on_error_stop => 0 so psql doesn't exit on SQL + # errors. This allows us to collect stderr and detect errors after the + # test completes. + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + # Setup injection points for each session + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-set-dead', 'wait'); +""" + ) + + # s3 starts REINDEX (will block on reindex-relation-concurrently-before-set-dead) + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tblpk_pkey;\n", + ) + + # Wait for s3 to hit injection point + _ok_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + # s1 starts UPSERT (will block on check-exclusion-or-unique-constraint-no-conflict) + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblpk VALUES (13,now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + # Wait for s1 to hit injection point + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + # Wakeup s3 to continue (reindex-relation-concurrently-before-set-dead) + _wakeup_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + # s2 starts UPSERT (will block on exec-insert-before-insert-speculative) + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblpk VALUES (13,now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + # Wait for s2 to hit injection point + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + # Wakeup s1 (check-exclusion-or-unique-constraint-no-conflict) + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + # Wakeup s2 (exec-insert-before-insert-speculative) + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + + _clean_safe_quit_ok(s1, s2, s3) + + # Cleanup test 1 + node.safe_psql("TRUNCATE TABLE test.tblpk") + + ########################################################################## + # Test: REINDEX CONCURRENTLY + UPSERT (wakeup at swap phase) + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-swap', 'wait'); +""" + ) + + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tblpk_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-swap") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblpk VALUES (13,now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _wakeup_injection_point(node, "reindex-relation-concurrently-before-swap") + + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblpk VALUES (13,now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblpk") + + ########################################################################## + # Test: REINDEX CONCURRENTLY + UPSERT (s1 wakes before reindex) + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-set-dead', 'wait'); +""" + ) + + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tblpk_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblpk VALUES (13,now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + # Start s2 BEFORE waking reindex (key difference from permutation 1) + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblpk VALUES (13,now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + # Wake s1 first, then reindex, then s2 + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + _wakeup_injection_point(node, "reindex-relation-concurrently-before-set-dead") + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblpk") + + ########################################################################## + # Test: REINDEX + UPSERT ON CONSTRAINT (set-dead phase) + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-set-dead', 'wait'); +""" + ) + + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tblpk_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblpk VALUES (13, now()) ON CONFLICT ON CONSTRAINT tblpk_pkey DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _wakeup_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblpk VALUES (13, now()) ON CONFLICT ON CONSTRAINT tblpk_pkey DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblpk") + + ########################################################################## + # Test: REINDEX + UPSERT ON CONSTRAINT (swap phase) + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-swap', 'wait'); +""" + ) + + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tblpk_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-swap") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblpk VALUES (13, now()) ON CONFLICT ON CONSTRAINT tblpk_pkey DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _wakeup_injection_point(node, "reindex-relation-concurrently-before-swap") + + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblpk VALUES (13, now()) ON CONFLICT ON CONSTRAINT tblpk_pkey DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblpk") + + ########################################################################## + # Test: REINDEX + UPSERT ON CONSTRAINT (s1 wakes before reindex) + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-set-dead', 'wait'); +""" + ) + + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tblpk_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblpk VALUES (13, now()) ON CONFLICT ON CONSTRAINT tblpk_pkey DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + # Start s2 BEFORE waking reindex + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblpk VALUES (13, now()) ON CONFLICT ON CONSTRAINT tblpk_pkey DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + # Wake s1 first, then reindex, then s2 + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + _wakeup_injection_point(node, "reindex-relation-concurrently-before-set-dead") + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblpk") + + ########################################################################## + # Test: REINDEX on partitioned table (set-dead phase) + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-set-dead', 'wait'); +""" + ) + + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tbl_partition_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblparted VALUES (13, now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _wakeup_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblparted VALUES (13, now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblparted") + + ########################################################################## + # Test: REINDEX on partitioned table (swap phase) + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-swap', 'wait'); +""" + ) + + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tbl_partition_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-swap") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblparted VALUES (13, now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _wakeup_injection_point(node, "reindex-relation-concurrently-before-swap") + + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblparted VALUES (13, now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblparted") + + ########################################################################## + # Test: REINDEX on partitioned table (s1 wakes before reindex) + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-set-dead', 'wait'); +""" + ) + + s3.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tbl_partition_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-set-dead") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblparted VALUES (13, now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + # Start s2 BEFORE waking reindex + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblparted VALUES (13, now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + # Wake s1 first, then reindex, then s2 + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + _wakeup_injection_point(node, "reindex-relation-concurrently-before-set-dead") + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblparted") + + ########################################################################## + # Test: REINDEX on partitioned table, cache inval between two + # get_partition_ancestors + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-init-partition-after-get-partition-ancestors', 'wait'); +""" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('reindex-relation-concurrently-before-swap', 'wait'); +""" + ) + + s2.query_until( + r"starting_reindex", + "\n\\echo starting_reindex\nREINDEX INDEX CONCURRENTLY test.tbl_partition_pkey;\n", + ) + + _ok_injection_point(node, "reindex-relation-concurrently-before-swap") + + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblparted VALUES (13, now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-init-partition-after-get-partition-ancestors") + + _wakeup_injection_point(node, "reindex-relation-concurrently-before-swap") + + _wakeup_injection_point(node, "exec-init-partition-after-get-partition-ancestors") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblparted") + + ########################################################################## + # Test: CREATE INDEX CONCURRENTLY + UPSERT + # Uses invalidate-catalog-snapshot-end to test catalog invalidation + # during UPSERT + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1_pid = s1.query_safe("SELECT pg_backend_pid()") + + # s1 attaches BOTH injection points - the unique constraint check AND + # catalog snapshot + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s1.query_until( + r"attaching_injection_point", + "\n\\echo attaching_injection_point\n" + "SELECT injection_points_attach('invalidate-catalog-snapshot-end', 'wait');\n", + ) + + # In cases of cache clobbering, s1 may hit the injection point during + # attach. Wait for that session to become idle (attach completed), or + # wake it up if it becomes stuck on injection point. + if not _wait_for_idle(node, s1_pid): + _ok_injection_point( + node, + "invalidate-catalog-snapshot-end", + "s1 hit injection point during attach (cache clobbering mode)", + ) + node.safe_psql( + "\n\t\tSELECT injection_points_wakeup('invalidate-catalog-snapshot-end');\n\t" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('define-index-before-set-valid', 'wait'); +""" + ) + + # s3: Start CREATE INDEX CONCURRENTLY (blocks on define-index-before-set-valid) + s3.query_until( + r"starting_create_index", + "\n\\echo starting_create_index\n" + "CREATE UNIQUE INDEX CONCURRENTLY tbl_pkey_duplicate ON test.tblpk(i);\n", + ) + + _ok_injection_point(node, "define-index-before-set-valid") + + # s1: Start UPSERT (blocks on invalidate-catalog-snapshot-end) + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblpk VALUES (13,now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "invalidate-catalog-snapshot-end") + + # Wakeup s3 (CREATE INDEX continues, triggers catalog invalidation) + _wakeup_injection_point(node, "define-index-before-set-valid") + + # s2: Start UPSERT (blocks on exec-insert-before-insert-speculative) + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblpk VALUES (13,now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + + _wakeup_injection_point(node, "invalidate-catalog-snapshot-end") + + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblparted") + + ########################################################################## + # Test: CREATE INDEX CONCURRENTLY on partial index + UPSERT + # Uses invalidate-catalog-snapshot-end to test catalog invalidation during + # UPSERT + + s1 = node.background_psql("postgres", on_error_stop=False) + s2 = node.background_psql("postgres", on_error_stop=False) + s3 = node.background_psql("postgres", on_error_stop=False) + + s1_pid = s1.query_safe("SELECT pg_backend_pid()") + + # s1 attaches BOTH injection points - the unique constraint check AND + # catalog snapshot + s1.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('check-exclusion-or-unique-constraint-no-conflict', 'wait'); +""" + ) + + s1.query_until( + r"attaching_injection_point", + "\n\\echo attaching_injection_point\n" + "SELECT injection_points_attach('invalidate-catalog-snapshot-end', 'wait');\n", + ) + + # In cases of cache clobbering, s1 may hit the injection point during + # attach. Wait for that session to become idle (attach completed), or + # wake it up if it becomes stuck on injection point. + if not _wait_for_idle(node, s1_pid): + _ok_injection_point( + node, + "invalidate-catalog-snapshot-end", + "s1 hit injection point during attach (cache clobbering mode)", + ) + node.safe_psql( + "\n\t\tSELECT injection_points_wakeup('invalidate-catalog-snapshot-end');\n\t" + ) + + s2.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('exec-insert-before-insert-speculative', 'wait'); +""" + ) + + s3.query_safe( + """ +SELECT injection_points_set_local(); +SELECT injection_points_attach('define-index-before-set-valid', 'wait'); +""" + ) + + # s3: Start CREATE INDEX CONCURRENTLY (blocks on define-index-before-set-valid) + s3.query_until( + r"starting_create_index", + "\n\\echo starting_create_index\n" + "CREATE UNIQUE INDEX CONCURRENTLY tbl_pkey_special_duplicate ON test.tblexpr(abs(i)) WHERE i < 10000;\n", + ) + + _ok_injection_point(node, "define-index-before-set-valid") + + # s1: Start UPSERT (blocks on invalidate-catalog-snapshot-end) + s1.query_until( + r"starting_upsert_s1", + "\n\\echo starting_upsert_s1\n" + "INSERT INTO test.tblexpr VALUES(13,now()) ON CONFLICT (abs(i)) WHERE i < 100 DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "invalidate-catalog-snapshot-end") + + # Wakeup s3 (CREATE INDEX continues, triggers catalog invalidation) + _wakeup_injection_point(node, "define-index-before-set-valid") + + # s2: Start UPSERT (blocks on exec-insert-before-insert-speculative) + s2.query_until( + r"starting_upsert_s2", + "\n\\echo starting_upsert_s2\n" + "INSERT INTO test.tblexpr VALUES(13,now()) ON CONFLICT (abs(i)) WHERE i < 100 DO UPDATE SET updated_at = now();\n", + ) + + _ok_injection_point(node, "exec-insert-before-insert-speculative") + _wakeup_injection_point(node, "invalidate-catalog-snapshot-end") + _ok_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + _wakeup_injection_point(node, "exec-insert-before-insert-speculative") + _wakeup_injection_point(node, "check-exclusion-or-unique-constraint-no-conflict") + + _clean_safe_quit_ok(s1, s2, s3) + + node.safe_psql("TRUNCATE TABLE test.tblexpr") diff --git a/src/test/modules/test_misc/pyt/test_011_lock_stats.py b/src/test/modules/test_misc/pyt/test_011_lock_stats.py new file mode 100644 index 0000000000000..f8480a0b8cbae --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_011_lock_stats.py @@ -0,0 +1,329 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_misc/t/011_lock_stats.pl. + +Test for the lock statistics and log_lock_waits. + +This test creates multiple locking situations when a session (s2) has to wait +on a lock for longer than deadlock_timeout. The first tests each test a +dedicated lock type. The last one checks that log_lock_waits has no impact on +the statistics counters. + +This test also checks that log_lock_waits messages are emitted both when a wait +occurs and when the lock is acquired, and that the "still waiting for" message +is logged exactly once per wait, even if the backend wakes due to signals. + +Skips without injection points. +""" + +import os +import re + +import pytest + +import pypg + +_DEADLOCK_TIMEOUT = 10 + + +def _setup_sessions(node): + """Set up the 2 sessions (mirrors the Perl setup_sessions sub).""" + s1 = node.background_psql("postgres") + s2 = node.background_psql("postgres") + + # Setup injection points for the waiting session + s2.query_until( + r"attaching_injection_point", + "\n\t\t\t\\echo attaching_injection_point\n" + "\t\t\tSELECT injection_points_attach('deadlock-timeout-fired', 'wait');\n", + ) + return s1, s2 + + +def _wait_for_pg_stat_lock(node, lock_type): + """Fetch waits and wait_time from pg_stat_lock for a given lock type until + they reach expected values: at least one wait and waiting longer than the + deadlock_timeout.""" + assert node.poll_query_until( + """ +\t\tSELECT waits > 0 AND wait_time >= {timeout} +\t\tFROM pg_stat_lock +\t\tWHERE locktype = '{lock_type}'; +\t""".format( + timeout=_DEADLOCK_TIMEOUT, lock_type=lock_type + ) + ), "Timed out waiting for pg_stat_lock for {}".format(lock_type) + + +def _wait_and_detach(node, point_name): + """Wait for a point, then detach it (mirrors Perl wait_and_detach).""" + node.wait_for_event("client backend", point_name) + node.safe_psql( + """ +SELECT injection_points_detach('{point}'); +SELECT injection_points_wakeup('{point}'); +""".format( + point=point_name + ) + ) + + +def test_011_lock_stats(create_pg): + """Lock statistics and log_lock_waits behavior across lock types.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + + # Node initialization + node = create_pg("node", start=False) + node.append_conf("deadlock_timeout = {}ms".format(_DEADLOCK_TIMEOUT)) + node.start() + + # Check if the extension injection_points is available + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + + node.safe_psql("CREATE EXTENSION injection_points;") + + node.safe_psql( + """ +CREATE TABLE test_stat_tab(key text not null, value int); +INSERT INTO test_stat_tab(key, value) VALUES('k0', 1); +""" + ) + + ########################################################################## + + # ###### Relation lock + + s1, s2 = _setup_sessions(node) + + log_offset = node.current_log_position() + + s1.query_safe( + """ +SELECT pg_stat_reset_shared('lock'); +BEGIN; +LOCK TABLE test_stat_tab; +""" + ) + + # s2 setup + s2.query_safe( + """ +BEGIN; +SELECT pg_stat_force_next_flush(); +""" + ) + # s2 blocks on LOCK. + s2.query_until( + r"lock_s2", + "\n\\echo lock_s2\nLOCK TABLE test_stat_tab;\n", + ) + + _wait_and_detach(node, "deadlock-timeout-fired") + + # Check that log_lock_waits message is emitted during a lock wait. + node.wait_for_log(r"still waiting for AccessExclusiveLock on relation", log_offset) + + # Wake the backend waiting on the lock and confirm it woke by calling + # pg_log_backend_memory_contexts() and checking for the logged memory + # contexts. This is necessary to test later that the "still waiting for" + # message is logged exactly once per wait, even if the backend wakes + # during the wait. + node.safe_psql( + "SELECT pg_log_backend_memory_contexts(pid)\n" + "\tFROM pg_locks WHERE locktype = 'relation' AND\n" + "\trelation = 'test_stat_tab'::regclass AND NOT granted;" + ) + node.wait_for_log(r"logging memory contexts", log_offset) + + # deadlock_timeout fired, now commit in s1 and s2 + s1.query_safe("COMMIT") + s2.query_safe("COMMIT") + + # check that pg_stat_lock has been updated + _wait_for_pg_stat_lock(node, "relation") + assert True, "Lock stats ok for relation" + + # Check that log_lock_waits message is emitted when the lock is acquired + # after waiting. + node.wait_for_log(r"acquired AccessExclusiveLock on relation", log_offset) + + # Check that the "still waiting for" message is logged exactly once per + # wait, even if the backend wakes during the wait. + log_contents = pypg.slurp_file(node.log, log_offset) + still_waiting = _find_all("still waiting for", log_contents) + assert len(still_waiting) == 1, ( + "still waiting logged exactly once despite wakeups from " + "pg_log_backend_memory_contexts()" + ) + + # close sessions + s1.quit() + s2.quit() + + # ###### transaction lock + + s1, s2 = _setup_sessions(node) + + log_offset = node.current_log_position() + + s1.query_safe( + """ +SELECT pg_stat_reset_shared('lock'); +INSERT INTO test_stat_tab(key, value) VALUES('k1', 1), ('k2', 1), ('k3', 1); +BEGIN; +UPDATE test_stat_tab SET value = value + 1 WHERE key = 'k1'; +""" + ) + + # s2 setup + s2.query_safe( + """ +SET log_lock_waits = on; +BEGIN; +SELECT pg_stat_force_next_flush(); +""" + ) + # s2 blocks here on UPDATE + s2.query_until( + r"lock_s2", + "\n\\echo lock_s2\n" + "UPDATE test_stat_tab SET value = value + 1 WHERE key = 'k1';\n", + ) + + _wait_and_detach(node, "deadlock-timeout-fired") + + # Check that log_lock_waits message is emitted during a lock wait. + node.wait_for_log(r"still waiting for ShareLock on transaction", log_offset) + + # deadlock_timeout fired, now commit in s1 and s2 + s1.query_safe("COMMIT") + s2.query_safe("COMMIT") + + # check that pg_stat_lock has been updated + _wait_for_pg_stat_lock(node, "transactionid") + assert True, "Lock stats ok for transactionid" + + # Check that log_lock_waits message is emitted when the lock is acquired + # after waiting. + node.wait_for_log(r"acquired ShareLock on transaction", log_offset) + + # Close sessions + s1.quit() + s2.quit() + + # ###### advisory lock + + s1, s2 = _setup_sessions(node) + + log_offset = node.current_log_position() + + s1.query_safe( + """ +SELECT pg_stat_reset_shared('lock'); +SELECT pg_advisory_lock(1); +""" + ) + + # s2 setup + s2.query_safe( + """ +SET log_lock_waits = on; +BEGIN; +SELECT pg_stat_force_next_flush(); +""" + ) + # s2 blocks on the advisory lock. + s2.query_until( + r"lock_s2", + "\n\\echo lock_s2\nSELECT pg_advisory_lock(1);\n", + ) + + _wait_and_detach(node, "deadlock-timeout-fired") + + # Check that log_lock_waits message is emitted during a lock wait. + node.wait_for_log(r"still waiting for ExclusiveLock on advisory lock", log_offset) + + # deadlock_timeout fired, now unlock and commit s2 + s1.query_safe("SELECT pg_advisory_unlock(1)") + s2.query_safe( + """ +SELECT pg_advisory_unlock(1); +COMMIT; +""" + ) + + # check that pg_stat_lock has been updated + _wait_for_pg_stat_lock(node, "advisory") + assert True, "Lock stats ok for advisory" + + # Check that log_lock_waits message is emitted when the lock is acquired + # after waiting. + node.wait_for_log(r"acquired ExclusiveLock on advisory lock", log_offset) + + # Close sessions + s1.quit() + s2.quit() + + # ###### Ensure log_lock_waits has no impact + + s1, s2 = _setup_sessions(node) + + log_offset = node.current_log_position() + + s1.query_safe( + """ +SELECT pg_stat_reset_shared('lock'); +BEGIN; +LOCK TABLE test_stat_tab; +""" + ) + + # s2 setup + s2.query_safe( + """ +SET log_lock_waits = off; +BEGIN; +SELECT pg_stat_force_next_flush(); +""" + ) + # s2 blocks on LOCK. + s2.query_until( + r"lock_s2", + "\n\\echo lock_s2\nLOCK TABLE test_stat_tab;\n", + ) + + _wait_and_detach(node, "deadlock-timeout-fired") + + # deadlock_timeout fired, now commit in s1 and s2 + s1.query_safe("COMMIT") + s2.query_safe("COMMIT") + + # check that pg_stat_lock has been updated + _wait_for_pg_stat_lock(node, "relation") + assert True, "log_lock_waits has no impact on Lock stats" + + # Check that no log_lock_waits messages are emitted + assert not node.log_matches( + "still waiting for AccessExclusiveLock on relation", log_offset + ), "check that no log_lock_waits message is emitted during a lock wait" + assert not node.log_matches( + "acquired AccessExclusiveLock on relation", log_offset + ), ( + "check that no log_lock_waits message is emitted when the lock is " + "acquired after waiting" + ) + + # close sessions + s1.quit() + s2.quit() + + # cleanup + node.safe_psql("DROP TABLE test_stat_tab;") + + +def _find_all(needle, haystack): + """Return all non-overlapping occurrences of needle (a regex) in haystack.""" + return re.findall(needle, haystack) diff --git a/src/test/modules/test_misc/pyt/test_012_ddlutils.py b/src/test/modules/test_misc/pyt/test_012_ddlutils.py new file mode 100644 index 0000000000000..2800238d5ccc1 --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_012_ddlutils.py @@ -0,0 +1,355 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_misc/t/012_ddlutils.pl. + +Tests for pg_get_database_ddl(), pg_get_tablespace_ddl(), and +pg_get_role_ddl(). These are TAP tests rather than plain regression tests +because they create databases and tablespaces, which are heavyweight +operations that should run only once rather than being repeated with every +invocation of the core regression suite. +""" + +import re + + +def _ddl_filter(text): + """Strip locale/collation details from DDL output so that results are + stable across platforms (mirrors the Perl ddl_filter sub).""" + text = re.sub( + r"\s*\bLOCALE_PROVIDER\b\s*=\s*(?:'[^']*'|\"[^\"]*\"|\S+)", + "", + text, + flags=re.IGNORECASE, + ) + text = re.sub( + r"\s*LC_COLLATE\s*=\s*(['\"])[^'\"]*\1", "", text, flags=re.IGNORECASE + ) + text = re.sub(r"\s*LC_CTYPE\s*=\s*(['\"])[^'\"]*\1", "", text, flags=re.IGNORECASE) + text = re.sub( + r"\s*\S*LOCALE\S*\s*=?\s*(['\"])[^'\"]*\1", "", text, flags=re.IGNORECASE + ) + text = re.sub( + r"\s*\S*COLLATION\S*\s*=?\s*(['\"])[^'\"]*\1", "", text, flags=re.IGNORECASE + ) + return text + + +def test_012_ddlutils(create_pg): + """pg_get_role_ddl/pg_get_database_ddl/pg_get_tablespace_ddl behavior.""" + node = create_pg("main", start=False) + # Force UTC so that timestamptz values (e.g. VALID UNTIL) render the same + # way regardless of the host's local timezone. + node.append_conf("timezone = 'UTC'\n") + node.start() + + def like(text, pattern, _msg): + assert re.search(pattern, text), "{}\noutput:\n{}".format(_msg, text) + + def unlike(text, pattern, _msg): + assert not re.search(pattern, text), "{}\noutput:\n{}".format(_msg, text) + + ###################################################################### + # pg_get_role_ddl tests + ###################################################################### + + # Basic role + node.safe_psql("CREATE ROLE regress_role_ddl_test1") + result = node.safe_psql("SELECT * FROM pg_get_role_ddl('regress_role_ddl_test1')") + like( + result, + r"CREATE ROLE regress_role_ddl_test1 .* NOLOGIN", + "basic role DDL", + ) + + # Role with multiple privileges + node.safe_psql( + """ +\tCREATE ROLE regress_role_ddl_test2 +\t LOGIN SUPERUSER CREATEDB CREATEROLE +\t CONNECTION LIMIT 5 +\t VALID UNTIL '2030-12-31 23:59:59+00'""" + ) + result = node.safe_psql("SELECT * FROM pg_get_role_ddl('regress_role_ddl_test2')") + like(result, r"SUPERUSER", "role with SUPERUSER") + like(result, r"CREATEDB", "role with CREATEDB") + like(result, r"CONNECTION LIMIT 5", "role with CONNECTION LIMIT") + like(result, r"VALID UNTIL '2030-12-31", "role with VALID UNTIL") + + # Role with configuration parameters + node.safe_psql( + """ +\tALTER ROLE regress_role_ddl_test1 SET work_mem TO '256MB'; +\tALTER ROLE regress_role_ddl_test1 SET search_path TO myschema, public""" + ) + result = node.safe_psql("SELECT * FROM pg_get_role_ddl('regress_role_ddl_test1')") + like(result, r"SET work_mem TO '256MB'", "role with work_mem setting") + like(result, r"SET search_path TO", "role with search_path setting") + + # Role with database-specific configuration (needs a real database) + node.safe_psql( + """ +\tCREATE DATABASE regression_ddlutils_test +\t TEMPLATE template0 ENCODING 'UTF8' LC_COLLATE 'C' LC_CTYPE 'C'; +\tALTER ROLE regress_role_ddl_test2 +\t IN DATABASE regression_ddlutils_test SET work_mem TO '128MB'""" + ) + result = node.safe_psql("SELECT * FROM pg_get_role_ddl('regress_role_ddl_test2')") + like( + result, + r"IN DATABASE regression_ddlutils_test SET work_mem TO '128MB'", + "role with database-specific setting", + ) + + # Role with special characters (requires quoting) + node.safe_psql('CREATE ROLE "regress_role-with-dash"') + result = node.safe_psql("SELECT * FROM pg_get_role_ddl('regress_role-with-dash')") + like(result, r'"regress_role-with-dash"', "role name requiring quoting") + + # Pretty-printed output + result = node.safe_psql( + "SELECT * FROM pg_get_role_ddl('regress_role_ddl_test2', 'pretty', 'true')" + ) + like(result, r"\n\s+SUPERUSER", "role pretty-print indents attributes") + + # Role with memberships + node.safe_psql( + """ +\tCREATE ROLE regress_role_ddl_grantor CREATEROLE; +\tCREATE ROLE regress_role_ddl_group1; +\tCREATE ROLE regress_role_ddl_group2; +\tCREATE ROLE regress_role_ddl_member; +\tGRANT regress_role_ddl_group1 TO regress_role_ddl_grantor WITH ADMIN TRUE; +\tGRANT regress_role_ddl_group2 TO regress_role_ddl_grantor WITH ADMIN TRUE; +\tSET ROLE regress_role_ddl_grantor; +\tGRANT regress_role_ddl_group1 TO regress_role_ddl_member +\t WITH INHERIT TRUE, SET FALSE; +\tGRANT regress_role_ddl_group2 TO regress_role_ddl_member +\t WITH ADMIN TRUE; +\tRESET ROLE""" + ) + result = node.safe_psql("SELECT * FROM pg_get_role_ddl('regress_role_ddl_member')") + like( + result, + r"GRANT regress_role_ddl_group1 TO regress_role_ddl_member", + "role with memberships includes GRANT", + ) + like(result, r"SET FALSE", "membership includes SET FALSE") + like(result, r"ADMIN TRUE", "membership includes ADMIN TRUE") + + # Memberships suppressed + result = node.safe_psql( + "SELECT * FROM pg_get_role_ddl('regress_role_ddl_member', 'memberships', 'false')" + ) + unlike(result, r"GRANT", "memberships suppressed") + + # Non-existent role (should error) + res = node.psql_capture("SELECT * FROM pg_get_role_ddl(9999999::oid)") + assert res.rc != 0, "non-existent role errors" + like(res.stderr, r"does not exist", "non-existent role error message") + + # NULL input (should return no rows) + result = node.safe_psql("SELECT count(*) FROM pg_get_role_ddl(NULL)") + assert result == "0", "NULL role returns no rows" + + # Permission check: revoke SELECT on pg_authid + node.safe_psql( + """ +\tCREATE ROLE regress_role_ddl_noaccess; +\tREVOKE SELECT ON pg_authid FROM PUBLIC""" + ) + res = node.psql_capture( + "SET ROLE regress_role_ddl_noaccess;\n" + "\t SELECT * FROM pg_get_role_ddl('regress_role_ddl_test1')" + ) + assert res.rc != 0, "role DDL denied without pg_authid access" + node.safe_psql( + """ +\tGRANT SELECT ON pg_authid TO PUBLIC""" + ) + + ###################################################################### + # pg_get_database_ddl tests + ###################################################################### + + # Set up: the test database was already created above for role tests. + node.safe_psql( + """ +\tALTER DATABASE regression_ddlutils_test OWNER TO regress_role_ddl_test2; +\tALTER DATABASE regression_ddlutils_test CONNECTION LIMIT 123; +\tALTER DATABASE regression_ddlutils_test SET random_page_cost = 2.0; +\tALTER ROLE regress_role_ddl_test2 +\t IN DATABASE regression_ddlutils_test SET random_page_cost = 1.1""" + ) + + # Non-existent database + res = node.psql_capture( + "SELECT * FROM pg_get_database_ddl('regression_no_such_db')" + ) + assert res.rc != 0, "non-existent database errors" + + # NULL input + result = node.safe_psql("SELECT count(*) FROM pg_get_database_ddl(NULL)") + assert result == "0", "NULL database returns no rows" + + # Invalid option + res = node.psql_capture( + "SELECT * FROM pg_get_database_ddl('regression_ddlutils_test', 'owner', 'invalid')" + ) + assert res.rc != 0, "invalid boolean option errors" + like(res.stderr, r"invalid value", "invalid option error message") + + # Duplicate option + res = node.psql_capture( + "SELECT * FROM pg_get_database_ddl('regression_ddlutils_test',\n" + "\t 'owner', 'false', 'owner', 'true')" + ) + assert res.rc != 0, "duplicate option errors" + + # Basic output (without locale details) + result = _ddl_filter( + node.safe_psql( + "SELECT pg_get_database_ddl\n" + "\t FROM pg_get_database_ddl('regression_ddlutils_test')" + ) + ) + like( + result, + r"CREATE DATABASE regression_ddlutils_test", + "database DDL includes CREATE", + ) + like(result, r"TEMPLATE = template0", "database DDL includes TEMPLATE") + like(result, r"ENCODING = 'UTF8'", "database DDL includes ENCODING") + like( + result, + r"OWNER TO regress_role_ddl_test2", + "database DDL includes OWNER", + ) + like(result, r"CONNECTION LIMIT = 123", "database DDL includes CONNLIMIT") + like( + result, + r"SET random_page_cost TO '2.0'", + "database DDL includes GUC setting", + ) + + # Pretty-printed output + result = _ddl_filter( + node.safe_psql( + "SELECT pg_get_database_ddl\n" + "\t FROM pg_get_database_ddl('regression_ddlutils_test',\n" + "\t 'pretty', 'true', 'tablespace', 'false')" + ) + ) + like(result, r"\n\s+WITH TEMPLATE", "database DDL pretty-prints WITH") + + # Permission check + node.safe_psql( + """ +\tREVOKE CONNECT ON DATABASE regression_ddlutils_test FROM PUBLIC""" + ) + res = node.psql_capture( + "SET ROLE regress_role_ddl_noaccess;\n" + "\t SELECT * FROM pg_get_database_ddl('regression_ddlutils_test')" + ) + assert res.rc != 0, "database DDL denied without CONNECT" + node.safe_psql( + """ +\tGRANT CONNECT ON DATABASE regression_ddlutils_test TO PUBLIC""" + ) + + ###################################################################### + # pg_get_tablespace_ddl tests + ###################################################################### + + # Non-existent tablespace by name + res = node.psql_capture( + "SELECT * FROM pg_get_tablespace_ddl('regress_nonexistent_tblsp')" + ) + assert res.rc != 0, "non-existent tablespace errors" + + # Non-existent tablespace by OID + res = node.psql_capture("SELECT * FROM pg_get_tablespace_ddl(0::oid)") + assert res.rc != 0, "non-existent tablespace OID errors" + + # NULL input (name and OID variants) + result = node.safe_psql("SELECT count(*) FROM pg_get_tablespace_ddl(NULL::name)") + assert result == "0", "NULL tablespace name returns no rows" + result = node.safe_psql("SELECT count(*) FROM pg_get_tablespace_ddl(NULL::oid)") + assert result == "0", "NULL tablespace OID returns no rows" + + # Tablespace name requiring quoting + node.safe_psql( + """ +\tSET allow_in_place_tablespaces = true; +\tCREATE TABLESPACE "regress_ tblsp" OWNER regress_role_ddl_test1 +\t LOCATION ''""" + ) + result = node.safe_psql("SELECT * FROM pg_get_tablespace_ddl('regress_ tblsp')") + like(result, r'"regress_ tblsp"', "tablespace name is quoted") + + # Rename and add options; reuse this tablespace for the remaining tests + node.safe_psql( + """ +\tALTER TABLESPACE "regress_ tblsp" RENAME TO regress_allopt_tblsp; +\tALTER TABLESPACE regress_allopt_tblsp +\t SET (seq_page_cost = '1.5', random_page_cost = '1.1234567890', +\t effective_io_concurrency = '17', maintenance_io_concurrency = '18')""" + ) + + # Tablespace with multiple options + result = node.safe_psql( + "SELECT * FROM pg_get_tablespace_ddl('regress_allopt_tblsp')" + ) + like( + result, + r"CREATE TABLESPACE regress_allopt_tblsp", + "tablespace DDL includes CREATE", + ) + like( + result, + r"OWNER regress_role_ddl_test1", + "tablespace DDL includes OWNER", + ) + like(result, r"seq_page_cost='1.5'", "tablespace DDL includes options") + + # Pretty-printed output + result = node.safe_psql( + "SELECT * FROM pg_get_tablespace_ddl('regress_allopt_tblsp',\n" + "\t 'pretty', 'true')" + ) + like(result, r"\n\s+OWNER", "tablespace DDL pretty-prints OWNER") + + # Owner suppressed + result = node.safe_psql( + "SELECT * FROM pg_get_tablespace_ddl('regress_allopt_tblsp',\n" + "\t 'owner', 'false')" + ) + unlike(result, r"OWNER", "tablespace DDL owner suppressed") + + # Lookup by OID + result = node.safe_psql( + """ +\tSELECT pg_get_tablespace_ddl +\tFROM pg_get_tablespace_ddl( +\t (SELECT oid FROM pg_tablespace +\t WHERE spcname = 'regress_allopt_tblsp'))""" + ) + like( + result, + r"CREATE TABLESPACE regress_allopt_tblsp", + "tablespace DDL by OID", + ) + + # Permission check + node.safe_psql("REVOKE SELECT ON pg_tablespace FROM PUBLIC") + res = node.psql_capture( + "SET ROLE regress_role_ddl_noaccess;\n" + "\t SELECT * FROM pg_get_tablespace_ddl('regress_allopt_tblsp')" + ) + assert res.rc != 0, "tablespace DDL denied without pg_tablespace access" + node.safe_psql( + """ +\tGRANT SELECT ON pg_tablespace TO PUBLIC""" + ) + + node.stop() diff --git a/src/test/modules/test_misc/pyt/test_013_temp_obj_multisession.py b/src/test/modules/test_misc/pyt/test_013_temp_obj_multisession.py new file mode 100644 index 0000000000000..f9ff9b06ba732 --- /dev/null +++ b/src/test/modules/test_misc/pyt/test_013_temp_obj_multisession.py @@ -0,0 +1,307 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_misc/t/013_temp_obj_multisession.pl. + +Tests that one session cannot read or modify data in another session's +temporary table. Each session keeps its temp data in its own local buffer +pool, and a different backend has no visibility into those buffers, so any +command that needs to look at the data must be rejected. + +DROP TABLE is intentionally allowed: it does not touch the table's contents, +and autovacuum relies on this to clean up orphaned temp relations left behind +by a crashed backend. + +A regression caught here typically means a new buffer-access entry point +bypasses the RELATION_IS_OTHER_TEMP() check. +""" + +import re + + +def test_013_temp_obj_multisession(create_pg): + """A session cannot read/modify another session's temp table data.""" + node = create_pg("temp_lock", start=False) + node.start() + + def like(text, pattern, _msg): + assert re.search(pattern, text), "{}\nstderr:\n{}".format(_msg, text) + + # Owner session. Created via background_psql so it stays alive while + # the second session probes its temp objects. + psql1 = node.background_psql("postgres") + + # Initially create the table without an index, so read paths go straight + # through the read-stream / buffer-manager entry points without being + # masked by an index scan that would hit ReadBuffer_common from nbtree. + psql1.query_safe("CREATE TEMP TABLE foo AS SELECT 42 AS val;") + + # Resolve the owner's temp schema so the probing session can refer to + # the table by a fully-qualified name. + tempschema = node.safe_psql( + """ + SELECT n.nspname + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE relname = 'foo' AND relpersistence = 't'; + """ + ) + assert re.match(r"^pg_temp_\d+$", tempschema), "got temp schema: {}".format( + tempschema + ) + + # DML and SELECT have to read the table's data and therefore go through + # the buffer manager. With no index on the table, the planner cannot + # use index access, so SELECT/UPDATE/DELETE/MERGE/COPY all run through + # the read-stream path and are caught by read_stream_begin_impl(). + + result = node.psql_capture( + "SELECT val FROM {}.foo;".format(tempschema), on_error_stop=False + ) + like( + result.stderr, + r"cannot access temporary tables of other sessions", + "SELECT (seqscan via read_stream)", + ) + + # INSERT goes through hio.c which calls ReadBufferExtended() to find a + # page with free space; that hits the existing check before any data + # is written. + result = node.psql_capture( + "INSERT INTO {}.foo VALUES (73);".format(tempschema), on_error_stop=False + ) + like( + result.stderr, + r"cannot access temporary tables of other sessions", + "INSERT (caught via hio.c)", + ) + + result = node.psql_capture( + "UPDATE {}.foo SET val = NULL;".format(tempschema), on_error_stop=False + ) + like( + result.stderr, + r"cannot access temporary tables of other sessions", + "UPDATE", + ) + + result = node.psql_capture( + "DELETE FROM {}.foo;".format(tempschema), on_error_stop=False + ) + like( + result.stderr, + r"cannot access temporary tables of other sessions", + "DELETE", + ) + + result = node.psql_capture( + "MERGE INTO {schema}.foo USING (VALUES (42)) AS s(val) " + "ON foo.val = s.val WHEN MATCHED THEN DELETE;".format(schema=tempschema), + on_error_stop=False, + ) + like( + result.stderr, + r"cannot access temporary tables of other sessions", + "MERGE", + ) + + result = node.psql_capture( + "COPY {}.foo TO STDOUT;".format(tempschema), on_error_stop=False + ) + like( + result.stderr, + r"cannot access temporary tables of other sessions", + "COPY", + ) + + # DDL and maintenance commands have their own command-specific checks + # (older than the buffer-manager check above), so they fail with + # command-specific error messages. Verifying them here documents the + # expected behaviour and guards against accidental removal of those + # checks. + + result = node.psql_capture( + "TRUNCATE TABLE {}.foo;".format(tempschema), on_error_stop=False + ) + like( + result.stderr, + r"cannot truncate temporary tables of other sessions", + "TRUNCATE", + ) + + result = node.psql_capture( + "ALTER TABLE {}.foo ALTER COLUMN val TYPE bigint;".format(tempschema), + on_error_stop=False, + ) + like( + result.stderr, + r"cannot alter temporary tables of other sessions", + "ALTER TABLE", + ) + + # VACUUM silently skips other sessions' temp tables (vacuum_rel() returns + # without warning to avoid noise during database-wide VACUUM). Verify + # that no error is reported, and that no buffer-access path is hit. + result = node.psql_capture("VACUUM {}.foo;".format(tempschema), on_error_stop=False) + assert result.stderr == "", "VACUUM is silently skipped" + + result = node.psql_capture( + "CLUSTER {}.foo;".format(tempschema), on_error_stop=False + ) + like( + result.stderr, + r"cannot execute CLUSTER on temporary tables of other sessions", + "CLUSTER", + ) + + # Now create an index to exercise the index-scan path. nbtree calls + # ReadBuffer (which is ReadBufferExtended -> ReadBuffer_common), so + # this exercises a different chain of buffer-manager entry points. + psql1.query_safe("CREATE INDEX ON foo(val);") + + result = node.psql_capture( + "SET enable_seqscan = off; SELECT val FROM {}.foo WHERE val = 42;".format( + tempschema + ), + on_error_stop=False, + ) + like( + result.stderr, + r"cannot access temporary tables of other sessions", + "index scan (ReadBuffer_common via nbtree)", + ) + + # ALTER INDEX goes through the same CheckAlterTableIsSafe() path as + # ALTER TABLE, so it produces the same error. + result = node.psql_capture( + "ALTER INDEX {}.foo_val_idx SET (fillfactor = 50);".format(tempschema), + on_error_stop=False, + ) + like( + result.stderr, + r"cannot alter temporary tables of other sessions", + "ALTER INDEX", + ) + + # A function created by the owner in its own pg_temp using its own + # row type can be observed via the catalog by a separate session. + # ALTER FUNCTION and DROP FUNCTION on it must work as catalog + # operations -- they don't read the underlying table -- which + # documents the boundary between catalog and data access for temp + # objects. + psql1.query_safe( + "CREATE FUNCTION pg_temp.foo_id(r foo) RETURNS int LANGUAGE SQL " + "AS 'SELECT r.val';" + ) + + result = node.psql_capture( + "ALTER FUNCTION {schema}.foo_id({schema}.foo) " + "SET search_path = pg_catalog;".format(schema=tempschema), + on_error_stop=False, + ) + assert ( + result.stderr == "" + ), "ALTER FUNCTION on function over other session's row type" + + result = node.psql_capture( + "DROP FUNCTION {schema}.foo_id({schema}.foo);".format(schema=tempschema), + on_error_stop=False, + ) + assert ( + result.stderr == "" + ), "DROP FUNCTION on function over other session's row type" + + # DROP TABLE on another session's temp table is intentionally permitted. + # DROP doesn't touch the table's contents, and autovacuum relies on this + # to remove temp relations orphaned by a crashed backend. Verify that + # the bare DROP succeeds without error. + result = node.psql_capture( + "DROP TABLE {}.foo;".format(tempschema), on_error_stop=False + ) + assert result.stderr == "", "DROP TABLE is allowed" + + # Cross-session CREATE FUNCTION scenario. The owner creates a fresh + # temp table foo2 in its pg_temp namespace, and a separate session + # then creates a function whose argument type is that row type. + # PostgreSQL allows this and emits a NOTICE: the function is moved + # into the creator's pg_temp namespace with an auto-dependency on + # the borrowed type, so it disappears together with the session that + # created it. + psql1.query_safe("CREATE TEMP TABLE foo2 AS SELECT 42 AS val;") + + result = node.psql_capture( + "CREATE FUNCTION public.cross_session_func(r {}.foo2) " + "RETURNS int LANGUAGE SQL AS 'SELECT 1';".format(tempschema), + on_error_stop=False, + ) + like( + result.stderr, + r'function "cross_session_func" will be effectively temporary', + "CREATE FUNCTION using other session's row type is effectively temporary", + ) + + # A bare DROP TABLE on foo2 now fails because cross_session_func + # depends on its row type. This is normal SQL dependency behaviour + # and documents that DROP itself is not blocked by buffer-manager + # checks -- we get a catalog-level error instead. + result = node.psql_capture( + "DROP TABLE {}.foo2;".format(tempschema), on_error_stop=False + ) + like( + result.stderr, + r"cannot drop table .*\.foo2 because other objects depend on it", + "DROP TABLE blocked by cross-session dependency", + ) + + foo2_oid = node.safe_psql("SELECT oid FROM pg_class WHERE relname='foo2';") + + # Cross-session LOCK TABLE scenario. Ensure that LockRelationOid is + # working properly for other temp tables since this mechanism is also + # used by autovacuum during orphaned tables cleanup. + psql2 = node.background_psql("postgres") + psql2.query_safe( + """ +\tBEGIN; +\tLOCK TABLE {}.foo2 IN ACCESS SHARE MODE; +""".format( + tempschema + ) + ) + + # When the owner session ends, its temp objects are dropped via the + # normal session-exit cleanup, which cascades through DEPENDENCY_NORMAL + # and also removes the cross-session function that depended on the temp + # row type. This is the same mechanism autovacuum relies on to clean up + # temp relations left behind by a crashed backend. + # Access share lock on the foo2 will block session-exit cleanup, because + # an owner will try to acquire deletion lock all its temp objects via + # findDependentObjects. + log_offset = node.current_log_position() + psql1.quit() + + # Check whether session-exit cleanup is blocked. + node.wait_for_log( + r"waiting for AccessExclusiveLock on relation {}".format(foo2_oid), + log_offset, + ) + + # Release lock on foo2 and allow session-exit cleanup to finish. + psql2.query_safe("COMMIT;") + psql2.quit() + + # After releasing the lock, the owner can finally acquire + # AccessExclusiveLock on foo2 and finish session-exit cleanup. Verify + # directly that both foo2 (the locked temp table) and cross_session_func + # (which depended on its row type) have been dropped. Both being gone + # confirms the owner's cleanup got past the blocked findDependentObjects() + # call and completed normally. + assert node.poll_query_until( + "SELECT NOT EXISTS (SELECT 1 FROM pg_class WHERE oid = {})".format(foo2_oid) + ), "foo2 was not cleaned up after owner session exit" + + assert ( + node.safe_psql( + "SELECT count(*) FROM pg_proc WHERE proname = 'cross_session_func'" + ) + == "0" + ), "cross_session_func cleaned up by cascade from foo2" diff --git a/src/test/modules/test_pg_dump/meson.build b/src/test/modules/test_pg_dump/meson.build index 1d2f57360923b..dcb44e4d6e395 100644 --- a/src/test/modules/test_pg_dump/meson.build +++ b/src/test/modules/test_pg_dump/meson.build @@ -19,4 +19,9 @@ tests += { 't/001_base.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_base.py', + ], + }, } diff --git a/src/test/modules/test_pg_dump/pyt/test_001_base.py b/src/test/modules/test_pg_dump/pyt/test_001_base.py new file mode 100644 index 0000000000000..84aee7c554de3 --- /dev/null +++ b/src/test/modules/test_pg_dump/pyt/test_001_base.py @@ -0,0 +1,1359 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/test_pg_dump/t/001_base.pl. + +Data-driven pg_dump/pg_restore matrix for the test_pg_dump extension. A set of +named dump runs (full dumps, section/schema/data-only dumps, format+restore +round-trips, extension include/exclude variants, pg_dumpall --globals-only) is +executed against a single seeded server. Each named test owns a regexp plus +'like'/'unlike' membership keyed by run (or test_key); for every run the test's +regexp must match the dump output iff the run is a 'like' and not an 'unlike'. + +Faithful transcription of the Perl original: %pgdump_runs, %full_runs, %tests +and the driver are reproduced below. Regexps preserve the Perl /xm (and /xms) +semantics via re.VERBOSE | re.MULTILINE (| re.DOTALL); the Perl \\Q...\\E +quotemeta blocks are expanded with re.escape (which also escapes spaces, so +they survive VERBOSE mode). +""" + +import os +import re +import tempfile +from typing import Dict, List, Optional, Pattern, Tuple + +import pypg + +XM = re.VERBOSE | re.MULTILINE +XMS = re.VERBOSE | re.MULTILINE | re.DOTALL + +# Each regexp is built from a sequence of segments. A ("lit", text) segment is +# a Perl \Q...\E quotemeta literal (re.escape, which also escapes spaces so it +# survives VERBOSE mode); an ("rx", raw) segment is verbatim regex syntax. +_Segment = Tuple[str, str] + + +def _qr(parts: List[_Segment], flags: int) -> Pattern[str]: + """Compile a Perl-style qr/.../ from literal/regex segments.""" + pieces = [] + for kind, val in parts: + pieces.append(re.escape(val) if kind == "lit" else val) + return re.compile("".join(pieces), flags) + + +# --------------------------------------------------------------------------- +# Definition of the pg_dump runs to make. Mirrors %pgdump_runs. +# +# Each entry maps a run name to a dict with: 'dump_cmd' (argv, with $tempdir +# placeholders resolved at runtime), optional 'restore_cmd', optional +# 'test_key' (reuse another run's like/unlike set), optional 'compile_option' +# (skip the run when the build dependency is missing, e.g. 'gzip'). +# --------------------------------------------------------------------------- + + +def _pgdump_runs(tempdir: str) -> Dict[str, dict]: + """Build the run matrix with $tempdir paths resolved (mirrors %pgdump_runs).""" + return { + "binary_upgrade": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/binary_upgrade.sql", + "--schema-only", + "--sequence-data", + "--binary-upgrade", + "--dbname", + "postgres", + ], + }, + "clean": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/clean.sql", + "--clean", + "--dbname", + "postgres", + ], + }, + "clean_if_exists": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/clean_if_exists.sql", + "--clean", + "--if-exists", + "--encoding", + "UTF8", # no-op, just tests that it is accepted + "postgres", + ], + }, + "createdb": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/createdb.sql", + "--create", + "--no-reconnect", # no-op, just for testing + "postgres", + ], + }, + "data_only": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/data_only.sql", + "--data-only", + "--verbose", # no-op, just make sure it works + "postgres", + ], + }, + "defaults": { + "dump_cmd": [ + "pg_dump", + "--file", + f"{tempdir}/defaults.sql", + "postgres", + ], + }, + "defaults_custom_format": { + "test_key": "defaults", + "compile_option": "gzip", + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "custom", + "--compress", + "6", + "--file", + f"{tempdir}/defaults_custom_format.dump", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--file", + f"{tempdir}/defaults_custom_format.sql", + f"{tempdir}/defaults_custom_format.dump", + ], + }, + "defaults_dir_format": { + "test_key": "defaults", + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "directory", + "--file", + f"{tempdir}/defaults_dir_format", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--file", + f"{tempdir}/defaults_dir_format.sql", + f"{tempdir}/defaults_dir_format", + ], + }, + "defaults_parallel": { + "test_key": "defaults", + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "directory", + "--jobs", + "2", + "--file", + f"{tempdir}/defaults_parallel", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--file", + f"{tempdir}/defaults_parallel.sql", + f"{tempdir}/defaults_parallel", + ], + }, + "defaults_tar_format": { + "test_key": "defaults", + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--format", + "tar", + "--file", + f"{tempdir}/defaults_tar_format.tar", + "postgres", + ], + "restore_cmd": [ + "pg_restore", + "--file", + f"{tempdir}/defaults_tar_format.sql", + f"{tempdir}/defaults_tar_format.tar", + ], + }, + "exclude_table": { + "dump_cmd": [ + "pg_dump", + "--exclude-table", + "regress_table_dumpable", + "--file", + f"{tempdir}/exclude_table.sql", + "postgres", + ], + }, + "extension_schema": { + "dump_cmd": [ + "pg_dump", + "--schema", + "public", + "--file", + f"{tempdir}/extension_schema.sql", + "postgres", + ], + }, + "pg_dumpall_globals": { + "dump_cmd": [ + "pg_dumpall", + "--no-sync", + "--file", + f"{tempdir}/pg_dumpall_globals.sql", + "--globals-only", + ], + }, + "no_privs": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_privs.sql", + "--no-privileges", + "postgres", + ], + }, + "no_owner": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/no_owner.sql", + "--no-owner", + "postgres", + ], + }, + # regress_dump_login_role shouldn't need SELECT rights on internal + # (undumped) extension tables + "privileged_internals": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/privileged_internals.sql", + # these two tables are irrelevant to the test case + "--exclude-table", + "regress_pg_dump_schema.external_tab", + "--exclude-table", + "regress_pg_dump_schema.extdependtab", + "--username", + "regress_dump_login_role", + "postgres", + ], + }, + "schema_only": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/schema_only.sql", + "--schema-only", + "postgres", + ], + }, + "section_pre_data": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/section_pre_data.sql", + "--section", + "pre-data", + "postgres", + ], + }, + "section_data": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/section_data.sql", + "--section", + "data", + "postgres", + ], + }, + "section_post_data": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/section_post_data.sql", + "--section", + "post-data", + "postgres", + ], + }, + "with_extension": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/with_extension.sql", + "--extension", + "test_pg_dump", + "postgres", + ], + }, + "exclude_extension": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/exclude_extension.sql", + "--exclude-extension", + "test_pg_dump", + "postgres", + ], + }, + "exclude_extension_filter": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/exclude_extension_filter.sql", + "--filter", + f"{tempdir}/exclude_extension_filter.txt", + "postgres", + ], + }, + # plpgsql in the list blocks the dump of extension test_pg_dump + "without_extension": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/without_extension.sql", + "--extension", + "plpgsql", + "postgres", + ], + }, + # plpgsql in the list of extensions blocks the dump of extension + # test_pg_dump. "public" is the schema used by the extension + # test_pg_dump, but none of its objects should be dumped. + "without_extension_explicit_schema": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/without_extension_explicit_schema.sql", + "--extension", + "plpgsql", + "--schema", + "public", + "postgres", + ], + }, + # plpgsql in the list of extensions blocks the dump of extension + # test_pg_dump, but not the dump of objects not dependent on the + # extension located on a schema maintained by the extension. + "without_extension_internal_schema": { + "dump_cmd": [ + "pg_dump", + "--no-sync", + "--file", + f"{tempdir}/without_extension_internal_schema.sql", + "--extension", + "plpgsql", + "--schema", + "regress_pg_dump_schema", + "postgres", + ], + }, + } + + +# Tests which are considered 'full' dumps by pg_dump, but there are flags used +# to exclude specific items (ACLs, LOs, etc). Mirrors %full_runs. +FULL_RUNS: Dict[str, int] = { + "binary_upgrade": 1, + "clean": 1, + "clean_if_exists": 1, + "createdb": 1, + "defaults": 1, + "exclude_table": 1, + "no_privs": 1, + "no_owner": 1, + "privileged_internals": 1, + "with_extension": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, +} + + +def _all_run_names() -> Dict[str, int]: + """The set of all run names (mirrors {%pgdump_runs} as a like/unlike set).""" + return {name: 1 for name in _pgdump_runs("")} + + +def _full() -> Dict[str, int]: + """A fresh copy of FULL_RUNS for merging into a test's 'like'.""" + return dict(FULL_RUNS) + + +# --------------------------------------------------------------------------- +# Definition of the tests to run. Mirrors %tests. +# +# Each entry maps a test name (also the log message) to a dict with: 'regexp' +# (compiled), 'like'/'unlike' dicts keyed by run-name or test_key, and optional +# 'create_order' (int) + 'create_sql' (run during setup, ordered by it). +# --------------------------------------------------------------------------- + + +def _tests() -> Dict[str, dict]: + """Build the test matrix (mirrors %tests).""" + return { + "ALTER EXTENSION test_pg_dump": { + "create_order": 9, + "create_sql": "ALTER EXTENSION test_pg_dump ADD TABLE regress_pg_dump_table_added;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE public.regress_pg_dump_table_added ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "col2 integer"), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "CREATE EXTENSION test_pg_dump": { + "create_order": 2, + "create_sql": "CREATE EXTENSION test_pg_dump;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE EXTENSION IF NOT EXISTS test_pg_dump WITH SCHEMA public;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "CREATE ROLE regress_dump_test_role": { + "create_order": 1, + "create_sql": "CREATE ROLE regress_dump_test_role;", + "regexp": re.compile( + r"^CREATE ROLE regress_dump_test_role;\n", re.MULTILINE + ), + "like": {"pg_dumpall_globals": 1}, + }, + "CREATE ROLE regress_dump_login_role": { + "create_order": 1, + "create_sql": "CREATE ROLE regress_dump_login_role LOGIN;", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE ROLE regress_dump_login_role;"), + ("rx", r"\n"), + ("lit", "ALTER ROLE regress_dump_login_role WITH "), + ("rx", r".*"), + ("lit", " LOGIN "), + ("rx", r".*;"), + ("rx", r"\n"), + ], + XM, + ), + "like": {"pg_dumpall_globals": 1}, + }, + "GRANT ALTER SYSTEM ON PARAMETER full_page_writes TO regress_dump_test_role": { + "create_order": 2, + "create_sql": "GRANT ALTER SYSTEM ON PARAMETER full_page_writes TO regress_dump_test_role;", + "regexp": re.compile( + r"^GRANT ALTER SYSTEM ON PARAMETER full_page_writes TO regress_dump_test_role;", + re.MULTILINE, + ), + "like": {"pg_dumpall_globals": 1}, + }, + "GRANT ALL ON PARAMETER Custom.Knob TO regress_dump_test_role WITH GRANT OPTION": { + "create_order": 2, + "create_sql": "GRANT SET, ALTER SYSTEM ON PARAMETER Custom.Knob TO regress_dump_test_role WITH GRANT OPTION;", + # "set" plus "alter system" is "all" privileges on parameters + "regexp": re.compile( + r'^GRANT ALL ON PARAMETER "custom.knob" TO regress_dump_test_role WITH GRANT OPTION;', + re.MULTILINE, + ), + "like": {"pg_dumpall_globals": 1}, + }, + "GRANT ALL ON PARAMETER DateStyle TO regress_dump_test_role": { + "create_order": 2, + "create_sql": 'GRANT ALL ON PARAMETER "DateStyle" TO regress_dump_test_role WITH GRANT OPTION; REVOKE GRANT OPTION FOR ALL ON PARAMETER DateStyle FROM regress_dump_test_role;', + # The revoke simplifies the ultimate grant so as to not include + # "with grant option" + "regexp": re.compile( + r"^GRANT ALL ON PARAMETER datestyle TO regress_dump_test_role;", + re.MULTILINE, + ), + "like": {"pg_dumpall_globals": 1}, + }, + "CREATE SCHEMA public": { + "regexp": re.compile(r"^CREATE SCHEMA public;", re.MULTILINE), + "like": { + "extension_schema": 1, + "without_extension_explicit_schema": 1, + }, + }, + "CREATE SEQUENCE regress_pg_dump_table_col1_seq": { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE SEQUENCE public.regress_pg_dump_table_col1_seq"), + ("rx", r"\n\s+"), + ("lit", "AS integer"), + ("rx", r"\n\s+"), + ("lit", "START WITH 1"), + ("rx", r"\n\s+"), + ("lit", "INCREMENT BY 1"), + ("rx", r"\n\s+"), + ("lit", "NO MINVALUE"), + ("rx", r"\n\s+"), + ("lit", "NO MAXVALUE"), + ("rx", r"\n\s+"), + ("lit", "CACHE 1;"), + ("rx", r"\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "CREATE TABLE regress_pg_dump_table_added": { + "create_order": 7, + "create_sql": "CREATE TABLE regress_pg_dump_table_added (col1 int not null, col2 int);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE public.regress_pg_dump_table_added ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "col2 integer"), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "CREATE SEQUENCE regress_pg_dump_seq": { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE SEQUENCE public.regress_pg_dump_seq"), + ("rx", r"\n\s+"), + ("lit", "START WITH 1"), + ("rx", r"\n\s+"), + ("lit", "INCREMENT BY 1"), + ("rx", r"\n\s+"), + ("lit", "NO MINVALUE"), + ("rx", r"\n\s+"), + ("lit", "NO MAXVALUE"), + ("rx", r"\n\s+"), + ("lit", "CACHE 1;"), + ("rx", r"\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "SETVAL SEQUENCE regress_seq_dumpable": { + "create_order": 6, + "create_sql": "SELECT nextval('regress_seq_dumpable');", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "SELECT pg_catalog.setval('public.regress_seq_dumpable', 1, true);", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "data_only": 1, + "section_data": 1, + "extension_schema": 1, + }, + "unlike": { + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "CREATE TABLE regress_pg_dump_table": { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE public.regress_pg_dump_table ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer NOT NULL,"), + ("rx", r"\n\s+"), + ("lit", "col2 integer,"), + ("rx", r"\n\s+"), + ( + "lit", + "CONSTRAINT regress_pg_dump_table_col2_check CHECK ((col2 > 0))", + ), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "COPY public.regress_table_dumpable (col1)": { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COPY public.regress_table_dumpable (col1) FROM stdin;"), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "data_only": 1, + "section_data": 1, + "extension_schema": 1, + }, + "unlike": { + "binary_upgrade": 1, + "exclude_table": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "REVOKE ALL ON FUNCTION wgo_then_no_access": { + "create_order": 3, + "create_sql": "\n\t\t\tDO $$BEGIN EXECUTE format(\n" + "\t\t\t\t'REVOKE ALL ON FUNCTION wgo_then_no_access()\n" + "\t\t\t\t FROM pg_signal_backend, public, %I',\n" + "\t\t\t\t(SELECT usename\n" + "\t\t\t\t FROM pg_user JOIN pg_proc ON proowner = usesysid\n" + "\t\t\t\t WHERE proname = 'wgo_then_no_access')); END$$;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "REVOKE ALL ON FUNCTION public.wgo_then_no_access() FROM PUBLIC;", + ), + ("rx", r"\n"), + ( + "lit", + "REVOKE ALL ON FUNCTION public.wgo_then_no_access() FROM ", + ), + ("rx", r".*;"), + ("rx", r"\n"), + ( + "lit", + "REVOKE ALL ON FUNCTION public.wgo_then_no_access() FROM pg_signal_backend;", + ), + ("rx", r""), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + }, + "unlike": { + "no_privs": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "REVOKE GRANT OPTION FOR UPDATE ON SEQUENCE wgo_then_regular": { + "create_order": 3, + "create_sql": "REVOKE GRANT OPTION FOR UPDATE ON SEQUENCE\n" + "\t\t\t\t\t\twgo_then_regular FROM pg_signal_backend;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "REVOKE ALL ON SEQUENCE public.wgo_then_regular FROM pg_signal_backend;", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT SELECT,UPDATE ON SEQUENCE public.wgo_then_regular TO pg_signal_backend;", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT USAGE ON SEQUENCE public.wgo_then_regular TO pg_signal_backend WITH GRANT OPTION;", + ), + ("rx", r""), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + }, + "unlike": { + "no_privs": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "CREATE ACCESS METHOD regress_test_am": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE ACCESS METHOD regress_test_am TYPE INDEX HANDLER bthandler;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "COMMENT ON EXTENSION test_pg_dump": { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "COMMENT ON EXTENSION test_pg_dump "), + ("lit", "IS 'Test pg_dump with an extension';"), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + }, + "unlike": { + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "GRANT SELECT regress_pg_dump_table_added pre-ALTER EXTENSION": { + "create_order": 8, + "create_sql": "GRANT SELECT ON regress_pg_dump_table_added TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT SELECT ON TABLE public.regress_pg_dump_table_added TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "REVOKE SELECT regress_pg_dump_table_added post-ALTER EXTENSION": { + "create_order": 10, + "create_sql": "REVOKE SELECT ON regress_pg_dump_table_added FROM regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "REVOKE SELECT ON TABLE public.regress_pg_dump_table_added FROM regress_dump_test_role;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + }, + "unlike": { + "no_privs": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "GRANT SELECT ON TABLE regress_pg_dump_table": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(true);", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT SELECT ON TABLE public.regress_pg_dump_table TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(false);", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {"binary_upgrade": 1}, + }, + "GRANT SELECT(col1) ON regress_pg_dump_table": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(true);", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT SELECT(col1) ON TABLE public.regress_pg_dump_table TO PUBLIC;", + ), + ("rx", r"\n"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(false);", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {"binary_upgrade": 1}, + }, + "GRANT SELECT(col2) ON regress_pg_dump_table TO regress_dump_test_role": { + "create_order": 4, + "create_sql": "GRANT SELECT(col2) ON regress_pg_dump_table\n" + "\t\t\t\t\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT SELECT(col2) ON TABLE public.regress_pg_dump_table TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + }, + "unlike": { + "no_privs": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "GRANT USAGE ON regress_pg_dump_table_col1_seq TO regress_dump_test_role": { + "create_order": 5, + "create_sql": "GRANT USAGE ON SEQUENCE regress_pg_dump_table_col1_seq\n" + "\t\t TO regress_dump_test_role;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT USAGE ON SEQUENCE public.regress_pg_dump_table_col1_seq TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + }, + "unlike": { + "no_privs": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + "GRANT USAGE ON regress_pg_dump_seq TO regress_dump_test_role": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "GRANT USAGE ON SEQUENCE public.regress_pg_dump_seq TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "REVOKE SELECT(col1) ON regress_pg_dump_table": { + "create_order": 3, + "create_sql": "REVOKE SELECT(col1) ON regress_pg_dump_table\n" + "\t\t\t\t\t\t FROM PUBLIC;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "REVOKE SELECT(col1) ON TABLE public.regress_pg_dump_table FROM PUBLIC;", + ), + ("rx", r"\n"), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + }, + "unlike": { + "no_privs": 1, + "exclude_extension": 1, + "exclude_extension_filter": 1, + "without_extension": 1, + }, + }, + # Objects included in extension part of a schema created by this extension + "CREATE TABLE regress_pg_dump_schema.test_table": { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE regress_pg_dump_schema.test_table ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer,"), + ("rx", r"\n\s+"), + ("lit", "col2 integer,"), + ("rx", r"\n\s+"), + ("lit", "CONSTRAINT test_table_col2_check CHECK ((col2 > 0))"), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "GRANT SELECT ON regress_pg_dump_schema.test_table": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(true);", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT SELECT ON TABLE regress_pg_dump_schema.test_table TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(false);", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {"binary_upgrade": 1}, + }, + "CREATE SEQUENCE regress_pg_dump_schema.test_seq": { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE SEQUENCE regress_pg_dump_schema.test_seq"), + ("rx", r"\n\s+"), + ("lit", "START WITH 1"), + ("rx", r"\n\s+"), + ("lit", "INCREMENT BY 1"), + ("rx", r"\n\s+"), + ("lit", "NO MINVALUE"), + ("rx", r"\n\s+"), + ("lit", "NO MAXVALUE"), + ("rx", r"\n\s+"), + ("lit", "CACHE 1;"), + ("rx", r"\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "GRANT USAGE ON regress_pg_dump_schema.test_seq": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(true);", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT USAGE ON SEQUENCE regress_pg_dump_schema.test_seq TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(false);", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {"binary_upgrade": 1}, + }, + "CREATE TYPE regress_pg_dump_schema.test_type": { + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TYPE regress_pg_dump_schema.test_type AS ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer"), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "GRANT USAGE ON regress_pg_dump_schema.test_type": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(true);", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT ALL ON TYPE regress_pg_dump_schema.test_type TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(false);", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {"binary_upgrade": 1}, + }, + "CREATE FUNCTION regress_pg_dump_schema.test_func": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE FUNCTION regress_pg_dump_schema.test_func() RETURNS integer", + ), + ("rx", r"\n\s+"), + ("lit", "LANGUAGE sql"), + ("rx", r"\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "GRANT ALL ON regress_pg_dump_schema.test_func": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(true);", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT ALL ON FUNCTION regress_pg_dump_schema.test_func() TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(false);", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {"binary_upgrade": 1}, + }, + "CREATE AGGREGATE regress_pg_dump_schema.test_agg": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "CREATE AGGREGATE regress_pg_dump_schema.test_agg(smallint) (", + ), + ("rx", r"\n\s+"), + ("lit", "SFUNC = int2_sum,"), + ("rx", r"\n\s+"), + ("lit", "STYPE = bigint"), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": {"binary_upgrade": 1}, + }, + "GRANT ALL ON regress_pg_dump_schema.test_agg": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(true);", + ), + ("rx", r"\n"), + ( + "lit", + "GRANT ALL ON FUNCTION regress_pg_dump_schema.test_agg(smallint) TO regress_dump_test_role;", + ), + ("rx", r"\n"), + ( + "lit", + "SELECT pg_catalog.binary_upgrade_set_record_init_privs(false);", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {"binary_upgrade": 1}, + }, + "ALTER INDEX pkey DEPENDS ON extension": { + "create_order": 11, + "create_sql": "CREATE TABLE regress_pg_dump_schema.extdependtab (col1 integer primary key, col2 int);\n" + "\t\tCREATE INDEX ON regress_pg_dump_schema.extdependtab (col2);\n" + "\t\tALTER INDEX regress_pg_dump_schema.extdependtab_col2_idx DEPENDS ON EXTENSION test_pg_dump;\n" + "\t\tALTER INDEX regress_pg_dump_schema.extdependtab_pkey DEPENDS ON EXTENSION test_pg_dump;", + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER INDEX regress_pg_dump_schema.extdependtab_pkey DEPENDS ON EXTENSION test_pg_dump;", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {**_all_run_names()}, + "unlike": { + "data_only": 1, + "extension_schema": 1, + "pg_dumpall_globals": 1, + "privileged_internals": 1, + "section_data": 1, + "section_pre_data": 1, + # Excludes this schema as extension is not listed. + "without_extension_explicit_schema": 1, + }, + }, + "ALTER INDEX idx DEPENDS ON extension": { + "regexp": _qr( + [ + ("rx", r"^"), + ( + "lit", + "ALTER INDEX regress_pg_dump_schema.extdependtab_col2_idx DEPENDS ON EXTENSION test_pg_dump;", + ), + ("rx", r"\n"), + ], + XMS, + ), + "like": {**_all_run_names()}, + "unlike": { + "data_only": 1, + "extension_schema": 1, + "pg_dumpall_globals": 1, + "privileged_internals": 1, + "section_data": 1, + "section_pre_data": 1, + # Excludes this schema as extension is not listed. + "without_extension_explicit_schema": 1, + }, + }, + # Objects not included in extension, part of schema created by extension + "CREATE TABLE regress_pg_dump_schema.external_tab": { + "create_order": 4, + "create_sql": "CREATE TABLE regress_pg_dump_schema.external_tab\n" + "\t\t\t\t\t\t (col1 int);", + "regexp": _qr( + [ + ("rx", r"^"), + ("lit", "CREATE TABLE regress_pg_dump_schema.external_tab ("), + ("rx", r"\n\s+"), + ("lit", "col1 integer"), + ("rx", r"\n\);\n"), + ], + XM, + ), + "like": { + **_full(), + "schema_only": 1, + "section_pre_data": 1, + # Excludes the extension and keeps the schema's data. + "without_extension_internal_schema": 1, + }, + "unlike": {"privileged_internals": 1}, + }, + } + + +def _create_order_key(item: Tuple[str, dict]) -> Tuple[int, int]: + """Sort key reproducing the Perl create_order comparator. + + Tests with create_order come first, ordered numerically; tests without it + follow in their existing (insertion) order, matching the stable sort the + Perl comparator yields for the no-order pairs. + """ + order = item[1].get("create_order") + if order is None: + return (1, 0) + return (0, order) + + +def _build_create_sql(tests: Dict[str, dict]) -> str: + """Concatenate each test's create_sql in create_order (mirrors the driver).""" + create_sql = "" + for _name, test in sorted(tests.items(), key=_create_order_key): + if test.get("create_sql"): + create_sql += test["create_sql"] + return create_sql + + +def _check_test_definitions(tests: Dict[str, dict], test_key: str) -> None: + """Reproduce the Perl die() sanity checks for like/unlike completeness.""" + for name, test in tests.items(): + if test.get("like") is None: + raise AssertionError(f'missing "like" in test "{name}"') + unlike = test.get("unlike") or {} + like = test["like"] + if unlike.get(test_key) and like.get(test_key) is None: + raise AssertionError( + f'useless "unlike" entry "{test_key}" in test "{name}"' + ) + + +def _run_tests_for_output( + run: str, + test_key: str, + output_file: str, + tests: Dict[str, dict], +) -> None: + """Apply every test's like/unlike rule to one run's dumped SQL.""" + _check_test_definitions(tests, test_key) + for name in sorted(tests): + test = tests[name] + like = test["like"] + unlike = test.get("unlike") or {} + regexp: Pattern[str] = test["regexp"] + if like.get(test_key) and unlike.get(test_key) is None: + assert regexp.search( + output_file + ), "{run}: should dump {name}\nReview {run} results".format( + run=run, name=name + ) + else: + assert not regexp.search( + output_file + ), "{run}: should not dump {name}\nReview {run} results".format( + run=run, name=name + ) + + +def test_001_base(create_pg): + """pg_dump/pg_restore matrix against the test_pg_dump extension.""" + with tempfile.TemporaryDirectory(prefix="pg_dump_001_base") as tempdir: + _run_matrix(create_pg, tempdir) + + +def _run_matrix(create_pg, tempdir: str) -> None: + """Seed a server, then execute every run x test pair (mirrors the driver).""" + pgdump_runs = _pgdump_runs(tempdir) + tests = _tests() + + # Create a PG instance to test actually dumping from. + node = create_pg("main", auth_extra=["--create-role", "regress_dump_login_role"]) + + supports_gzip = pypg.check_pg_config(r"#define HAVE_LIBZ 1") + + # Set up schemas, tables, etc, to be dumped: build and run the combined + # create statements (ordered by create_order). + node.safe_psql(_build_create_sql(tests)) + + # Create filter file for the exclude_extension_filter run. + with open( + os.path.join(tempdir, "exclude_extension_filter.txt"), "w", encoding="utf-8" + ) as filterfile: + filterfile.write("exclude extension test_pg_dump\n") + + # Run all runs. + for run in sorted(pgdump_runs): + spec = pgdump_runs[run] + + # Skip command-level tests for gzip if there is no support for it. + if spec.get("compile_option") == "gzip" and not supports_gzip: + continue + + node.command_ok(spec["dump_cmd"], f"{run}: pg_dump runs") + + restore_cmd: Optional[list] = spec.get("restore_cmd") + if restore_cmd: + node.command_ok(restore_cmd, f"{run}: pg_restore runs") + + test_key = spec.get("test_key", run) + + output_file = pypg.slurp_file(os.path.join(tempdir, f"{run}.sql")) + + _run_tests_for_output(run, test_key, output_file, tests) diff --git a/src/test/modules/test_plan_advice/meson.build b/src/test/modules/test_plan_advice/meson.build index 3dfa950ac79f1..af6c3a4deab4c 100644 --- a/src/test/modules/test_plan_advice/meson.build +++ b/src/test/modules/test_plan_advice/meson.build @@ -27,4 +27,10 @@ tests += { ], 'test_kwargs': {'priority': 50} }, + 'pytest': { + 'tests': [ + 'pyt/test_001_replan_regress.py', + ], + 'test_kwargs': {'priority': 50} + }, } diff --git a/src/test/modules/test_plan_advice/pyt/test_001_replan_regress.py b/src/test/modules/test_plan_advice/pyt/test_001_replan_regress.py new file mode 100644 index 0000000000000..5bd168e8f832d --- /dev/null +++ b/src/test/modules/test_plan_advice/pyt/test_001_replan_regress.py @@ -0,0 +1,50 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_plan_advice/t/001_replan_regress.pl. + +Runs the core regression suite against a server with test_plan_advice preloaded +(and feedback warnings on), so that plan advice is generated and replayed for +every regression query. The whole pg_regress parallel schedule must pass. +""" + +import os +import subprocess + +import pypg + + +def test_001_replan_regress(create_pg, tmp_check): + """The core regression suite passes with test_plan_advice preloaded.""" + node = create_pg("main", start=False) + node.append_conf( + "shared_preload_libraries='test_plan_advice'\n" + "wal_level=replica\n" + "pg_plan_advice.always_explain_supplied_advice=false\n" + "pg_plan_advice.feedback_warnings=true\n" + ) + node.start() + srcdir = os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..") + ) + dlpath = os.path.dirname(os.environ["REGRESS_SHLIB"]) + outputdir = str(tmp_check) + inputdir = os.path.join(srcdir, "src", "test", "regress") + cmd = [ + os.environ["PG_REGRESS"], + "--bindir=", + "--dlpath=" + dlpath, + "--host=" + str(node.host), + "--port=" + str(node.port), + "--schedule=" + os.path.join(inputdir, "parallel_schedule"), + "--max-concurrent-tests=20", + "--inputdir=" + inputdir, + "--outputdir=" + outputdir, + ] + rc = subprocess.run(cmd, check=False).returncode + if rc != 0: + diffs = os.path.join(outputdir, "regression.diffs") + if os.path.exists(diffs): + print("=== dumping {} ===".format(diffs)) + print(pypg.slurp_file(diffs)) + print("=== EOF ===") + assert rc == 0, "regression tests pass" diff --git a/src/test/modules/test_saslprep/meson.build b/src/test/modules/test_saslprep/meson.build index 2fcc403ca0728..bb0d42f370593 100644 --- a/src/test/modules/test_saslprep/meson.build +++ b/src/test/modules/test_saslprep/meson.build @@ -30,6 +30,11 @@ tests += { 'test_saslprep', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_saslprep_ranges.py', + ], + }, 'tap': { 'tests': [ 't/001_saslprep_ranges.pl', diff --git a/src/test/modules/test_saslprep/pyt/test_001_saslprep_ranges.py b/src/test/modules/test_saslprep/pyt/test_001_saslprep_ranges.py new file mode 100644 index 0000000000000..74135a090d9db --- /dev/null +++ b/src/test/modules/test_saslprep/pyt/test_001_saslprep_ranges.py @@ -0,0 +1,28 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_saslprep/t/001_saslprep_ranges.pl. + +SASLprep (RFC 4013) codepoint range handling via the test_saslprep module: valid codepoints normalize/return empty, prohibited/unassigned codepoints error. Gated on PG_TEST_EXTRA=saslprep. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import os +import pytest +import re + + +def test_001_saslprep_ranges(create_pg): + """SASLprep codepoint range handling (gated on PG_TEST_EXTRA=saslprep).""" + if (not os.environ.get("PG_TEST_EXTRA")) or ( + not re.search(r"""\bsaslprep\b""", os.environ.get("PG_TEST_EXTRA", "")) + ): + pytest.skip("test saslprep not enabled in PG_TEST_EXTRA") + node = create_pg("main", start=False) + node.start() + node.safe_psql("CREATE EXTENSION test_saslprep;") + result = node.safe_psql( + "SELECT * FROM test_saslprep_ranges()\n WHERE status = 'SUCCESS' AND res IN (NULL, '')" + ) + assert result == "", "valid codepoints returning an empty password" + node.stop() diff --git a/src/test/modules/test_shmem/meson.build b/src/test/modules/test_shmem/meson.build index fb4bf328b8f42..2e48521b19f11 100644 --- a/src/test/modules/test_shmem/meson.build +++ b/src/test/modules/test_shmem/meson.build @@ -30,4 +30,9 @@ tests += { 't/001_late_shmem_alloc.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_late_shmem_alloc.py', + ], + }, } diff --git a/src/test/modules/test_shmem/pyt/test_001_late_shmem_alloc.py b/src/test/modules/test_shmem/pyt/test_001_late_shmem_alloc.py new file mode 100644 index 0000000000000..932da0a0ec954 --- /dev/null +++ b/src/test/modules/test_shmem/pyt/test_001_late_shmem_alloc.py @@ -0,0 +1,37 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/test_shmem/t/001_late_shmem_alloc.pl. + +A shared-memory segment requested after startup (via CREATE EXTENSION) runs its +attach callback in every backend, so a per-backend attach counter rises with +each new connection. When the module is instead loaded via +shared_preload_libraries, the segment is allocated once in the postmaster and +inherited by fork, so without EXEC_BACKEND the attach callback never runs. +""" + + +def test_001_late_shmem_alloc(create_pg): + """Late shmem attach callback fires per backend, but not when preloaded.""" + node = create_pg("main") + node.safe_psql("CREATE EXTENSION test_shmem;") + attach_count1 = node.safe_psql("SELECT get_test_shmem_attach_count();") + attach_count2 = node.safe_psql("SELECT get_test_shmem_attach_count();") + assert int(attach_count2) > int( + attach_count1 + ), "attach callback is called in each backend" + node.stop() + node.append_conf("shared_preload_libraries = 'test_shmem'") + node.start() + exec_backend = node.safe_psql("SHOW debug_exec_backend;") == "on" + attach_count1 = node.safe_psql("SELECT get_test_shmem_attach_count();") + attach_count2 = node.safe_psql("SELECT get_test_shmem_attach_count();") + if exec_backend: + assert int(attach_count2) > int( + attach_count1 + ), "attach callback is called in each backend when loaded via shared_preload_libraries" + else: + assert ( + int(attach_count1) == 0 and int(attach_count2) == 0 + ), "attach callback is not called when loaded via shared_preload_libraries" + node.stop() diff --git a/src/test/modules/test_slru/meson.build b/src/test/modules/test_slru/meson.build index 00f3ee3054d87..a1dd659a2e27d 100644 --- a/src/test/modules/test_slru/meson.build +++ b/src/test/modules/test_slru/meson.build @@ -33,6 +33,12 @@ tests += { 'regress_args': ['--temp-config', files('test_slru.conf')], 'runningcheck': false, }, + 'pytest': { + 'tests': [ + 'pyt/test_001_multixact.py', + 'pyt/test_002_multixact_wraparound.py', + ], + }, 'tap': { 'env': { 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', diff --git a/src/test/modules/test_slru/pyt/test_001_multixact.py b/src/test/modules/test_slru/pyt/test_001_multixact.py new file mode 100644 index 0000000000000..de25bf65c0652 --- /dev/null +++ b/src/test/modules/test_slru/pyt/test_001_multixact.py @@ -0,0 +1,43 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/test_slru/t/001_multixact.pl. + +MultiXact SLRU wraparound handling via the test_slru module: creating multixacts that wrap past the SLRU page boundary (with a backend paused at the multixact-create injection point) does not corrupt or lose multixact members. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import os +import pytest + + +def test_001_multixact(create_pg): + """MultiXact SLRU wraparound member handling.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("main", start=False) + node.append_conf("shared_preload_libraries = 'test_slru,injection_points'") + node.start() + node.safe_psql("CREATE EXTENSION injection_points") + node.safe_psql("CREATE EXTENSION test_slru") + bg_psql = node.background_psql("postgres") + multi1 = bg_psql.query("SELECT test_create_multixact();") + node.safe_psql( + "SELECT injection_points_attach('multixact-create-from-members','wait');" + ) + bg_psql.query_until( + r"""assigning lost multi""", + "\\echo assigning lost multi\n\tSELECT test_create_multixact();", + ) + node.wait_for_event("client backend", "multixact-create-from-members") + node.safe_psql("SELECT injection_points_detach('multixact-create-from-members')") + multi2 = node.safe_psql("SELECT test_create_multixact();") + node.stop("immediate") + node.start() + bg_psql.quit() + assert ( + node.safe_psql("SELECT test_read_multixact('" + str(multi1) + "');") == "" + ), "first recorded multi is readable" + assert ( + node.safe_psql("SELECT test_read_multixact('" + str(multi2) + "');") == "" + ), "second recorded multi is readable" diff --git a/src/test/modules/test_slru/pyt/test_002_multixact_wraparound.py b/src/test/modules/test_slru/pyt/test_002_multixact_wraparound.py new file mode 100644 index 0000000000000..029a3c003aeaa --- /dev/null +++ b/src/test/modules/test_slru/pyt/test_002_multixact_wraparound.py @@ -0,0 +1,49 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/test_slru/t/002_multixact_wraparound.pl. + +Forces the multixact ID space close to wraparound (pg_resetwal sets the next +multixact to 0xFFFFFFF8 and the offsets SLRU is pre-extended/zeroed at the +matching segment) and then creates 16 multixacts via the test_slru extension. +Wraparound occurs (the last ID is less than the first) and every created +multixact remains readable afterward. +""" + +import os +import re + + +def test_002_multixact_wraparound(create_pg): + """Multixacts created across a forced wraparound stay readable.""" + node = create_pg("main", start=False) + node.append_conf("shared_preload_libraries = 'test_slru'") + pgdata = str(node.datadir) + node.command_ok( + ["pg_resetwal", "--multixact-ids", "0xFFFFFFF8,0xFFFFFFF8", pgdata], + "set the cluster's next multitransaction to 0xFFFFFFF8", + ) + out = node.bin.run_command(["pg_resetwal", "--dry-run", pgdata]).stdout + blcksz = int(re.search(r"^Database block size: *(\d+)$", out, re.M).group(1)) + slru_pages = int(re.search(r"^Pages per SLRU segment: *(\d+)$", out, re.M).group(1)) + offsets_per_page = blcksz // 8 # sizeof(MultiXactOffset) == 8 + segno = int(0xFFFFFFF8 / offsets_per_page / slru_pages) + slru_file = os.path.join(pgdata, "pg_multixact", "offsets", "{:04X}".format(segno)) + bytes_per_seg = slru_pages * blcksz + with open(slru_file, "wb") as fh: + fh.write(b"\0" * bytes_per_seg) + os.unlink(os.path.join(pgdata, "pg_multixact", "offsets", "0000")) + node.start() + node.safe_psql("CREATE EXTENSION test_slru") + multixact_ids = [ + node.safe_psql("SELECT test_create_multixact();") for _ in range(16) + ] + first_multi, last_multi = int(multixact_ids[0]), int(multixact_ids[-1]) + assert ( + last_multi < first_multi + ), "multixact wraparound occurred (first: {}, last: {})".format( + first_multi, last_multi + ) + for i, multi in enumerate(multixact_ids): + assert ( + node.safe_psql("SELECT test_read_multixact('{}');".format(multi)) == "" + ), "multixact {} (ID: {}) is readable after wraparound".format(i, multi) diff --git a/src/test/modules/worker_spi/meson.build b/src/test/modules/worker_spi/meson.build index 6475e23f60173..5c68c2cb959b1 100644 --- a/src/test/modules/worker_spi/meson.build +++ b/src/test/modules/worker_spi/meson.build @@ -25,6 +25,15 @@ tests += { 'name': 'worker_spi', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_001_worker_spi.py', + 'pyt/test_002_worker_terminate.py', + ], + }, 'tap': { 'env': { 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', diff --git a/src/test/modules/worker_spi/pyt/test_001_worker_spi.py b/src/test/modules/worker_spi/pyt/test_001_worker_spi.py new file mode 100644 index 0000000000000..3d68ce7bf42b9 --- /dev/null +++ b/src/test/modules/worker_spi/pyt/test_001_worker_spi.py @@ -0,0 +1,130 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/modules/worker_spi/t/001_worker_spi.pl. + +worker_spi dynamic and preloaded background workers: workers launch via worker_spi_launch and as shared_preload_libraries entries, create their schema/table, perform their periodic work, and respect database/role arguments. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_worker_spi(create_pg): + """worker_spi dynamic and preloaded background workers.""" + node = create_pg("mynode", start=False) + node.start() + node.safe_psql("CREATE EXTENSION worker_spi;") + result = node.safe_psql("SELECT worker_spi_launch(4) IS NOT NULL;") + assert result == "t", "dynamic bgworker launched" + node.poll_query_until( + "SELECT count(*) > 0 FROM information_schema.tables\n\t WHERE table_schema = 'schema4' AND table_name = 'counted';" + ) + node.safe_psql("INSERT INTO schema4.counted VALUES ('total', 0), ('delta', 1);") + node.reload() + node.poll_query_until( + "SELECT count(*) FROM schema4.counted WHERE type = 'delta';", expected="0" + ) + result = node.safe_psql("SELECT * FROM schema4.counted;") + assert result == "total|1", "dynamic bgworker correctly consumed tuple data" + result = node.poll_query_until( + "SELECT wait_event FROM pg_stat_activity WHERE backend_type ~ 'worker_spi';", + expected="WorkerSpiMain", + ) + assert result, 'dynamic bgworker has reported "WorkerSpiMain" as wait event' + result = node.safe_psql( + "SELECT count(*) > 0 from pg_wait_events where type = 'Extension' and name = 'WorkerSpiMain';" + ) + assert result, '"WorkerSpiMain" is reported in pg_wait_events' + node.safe_psql("CREATE DATABASE mydb;") + node.safe_psql("CREATE ROLE myrole SUPERUSER LOGIN;") + node.safe_psql("CREATE EXTENSION worker_spi;", dbname="mydb") + node.append_conf( + "\nshared_preload_libraries = 'worker_spi'\nworker_spi.database = 'mydb'\nworker_spi.total_workers = 3\nmax_worker_processes = 32\n" + ) + node.restart() + assert node.poll_query_until( + "SELECT datname, count(datname), wait_event FROM pg_stat_activity\n WHERE backend_type = 'worker_spi' GROUP BY datname, wait_event;", + expected="mydb|3|WorkerSpiMain", + ), "poll_query_until" + myrole_id = node.safe_psql( + "SELECT oid FROM pg_roles where rolname = 'myrole';", dbname="mydb" + ) + mydb_id = node.safe_psql( + "SELECT oid FROM pg_database where datname = 'mydb';", dbname="mydb" + ) + postgresdb_id = node.safe_psql( + "SELECT oid FROM pg_database where datname = 'postgres';", dbname="mydb" + ) + worker1_pid = node.safe_psql( + "SELECT worker_spi_launch(10, " + str(mydb_id) + ", " + str(myrole_id) + ");", + dbname="mydb", + ) + worker2_pid = node.safe_psql( + "SELECT worker_spi_launch(11, " + + str(postgresdb_id) + + ", " + + str(myrole_id) + + ");", + dbname="mydb", + ) + assert node.poll_query_until( + "SELECT datname, usename, wait_event FROM pg_stat_activity\n WHERE backend_type = 'worker_spi dynamic' AND\n pid IN (" + + str(worker1_pid) + + ", " + + str(worker2_pid) + + ") ORDER BY datname;", + expected="mydb|myrole|WorkerSpiMain\npostgres|myrole|WorkerSpiMain", + ), "poll_query_until" + node.safe_psql("CREATE DATABASE noconndb ALLOW_CONNECTIONS false;") + noconndb_id = node.safe_psql( + "SELECT oid FROM pg_database where datname = 'noconndb';", dbname="mydb" + ) + log_offset = node.current_log_position() + node.psql_capture( + "SELECT worker_spi_launch(12, " + + str(noconndb_id) + + ", " + + str(myrole_id) + + ");" + ) + node.wait_for_log( + r"""database "noconndb" is not currently accepting connections""", log_offset + ) + worker4_pid = node.safe_psql( + "SELECT worker_spi_launch(12, " + + str(noconndb_id) + + ", " + + str(myrole_id) + + ", '{\"ALLOWCONN\"}');" + ) + assert node.poll_query_until( + "SELECT datname, usename, wait_event FROM pg_stat_activity\n WHERE backend_type = 'worker_spi dynamic' AND\n pid IN (" + + str(worker4_pid) + + ") ORDER BY datname;", + expected="noconndb|myrole|WorkerSpiMain", + ), "poll_query_until" + node.safe_psql( + "CREATE ROLE nologrole WITH NOLOGIN;\n GRANT CREATE ON DATABASE mydb TO nologrole;" + ) + nologrole_id = node.safe_psql( + "SELECT oid FROM pg_roles where rolname = 'nologrole';", dbname="mydb" + ) + log_offset = node.current_log_position() + node.psql_capture( + "SELECT worker_spi_launch(13, " + str(mydb_id) + ", " + str(nologrole_id) + ");" + ) + node.wait_for_log(r"""role "nologrole" is not permitted to log in""", log_offset) + log_offset = node.current_log_position() + worker5_pid = node.safe_psql( + "SELECT worker_spi_launch(13, " + + str(mydb_id) + + ", " + + str(nologrole_id) + + ", '{\"ROLELOGINCHECK\"}');", + dbname="mydb", + ) + assert node.poll_query_until( + "SELECT datname, usename, wait_event FROM pg_stat_activity\n WHERE backend_type = 'worker_spi dynamic' AND\n pid = " + + str(worker5_pid) + + ";", + expected="mydb|nologrole|WorkerSpiMain", + ), "poll_query_until" diff --git a/src/test/modules/worker_spi/pyt/test_002_worker_terminate.py b/src/test/modules/worker_spi/pyt/test_002_worker_terminate.py new file mode 100644 index 0000000000000..bd522237ffb97 --- /dev/null +++ b/src/test/modules/worker_spi/pyt/test_002_worker_terminate.py @@ -0,0 +1,107 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/worker_spi/t/002_worker_terminate.pl. + +A non-interruptible worker_spi background worker connected to a database blocks +CREATE DATABASE ... WITH TEMPLATE of that database; an interruptible worker is +instead terminated by administrator commands that need exclusive access +(CREATE DATABASE WITH TEMPLATE, ALTER DATABASE RENAME/SET TABLESPACE, DROP +DATABASE), each logging the termination and the worker's exit. Requires an +injection-points build. +""" + +import os +import re +import tempfile + +import pytest + + +def _launch_bgworker(node, database, testcase, interruptible): + pid = node.safe_psql( + "SELECT worker_spi_launch({}, '{}'::regdatabase, 0, '{{}}', {});".format( + testcase, database, interruptible + ) + ) + assert node.poll_query_until( + "SELECT wait_event FROM pg_stat_activity WHERE pid = {};".format(pid), + "WorkerSpiMain", + ), "dynamic bgworker {} launched".format(testcase) + return pid + + +def _run_interruptible(node, command, test_name, pid): + offset = node.current_log_position() + node.safe_psql(command) + node.wait_for_log( + r'terminating background worker "worker_spi dynamic" due to ' + r"administrator command", + offset, + ) + node.wait_for_log( + r'LOG: .*background worker "worker_spi dynamic" \(PID {}\) exited with ' + r"exit code".format(pid), + offset, + ) + assert ( + node.safe_psql( + "SELECT count(*) = 0 FROM pg_stat_activity WHERE pid = {};".format(pid) + ) + == "t" + ), "dynamic bgworker stopped for {}".format(test_name) + + +def test_002_worker_terminate(create_pg): + """worker_spi bgworkers block or are terminated by exclusive DB commands.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("mynode", start=False) + node.append_conf( + "\nautovacuum = off\ndebug_parallel_query = off\nlog_min_messages = debug1\n" + "worker_spi.naptime = 600\n" + ) + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql("CREATE EXTENSION worker_spi;") + _launch_bgworker(node, "postgres", 0, "false") + node.safe_psql("CREATE EXTENSION injection_points;") + node.safe_psql("SELECT injection_points_attach('procarray-reduce-count', 'error');") + res = node.psql_capture("CREATE DATABASE testdb WITH TEMPLATE postgres") + assert re.search( + r'source database "postgres" is being accessed by other users', res.stderr + ), "background worker blocked the database creation" + assert ( + node.safe_psql( + "SELECT count(1) FROM pg_stat_activity WHERE backend_type = " + "'worker_spi dynamic';" + ) + == "1" + ), "background worker still running after CREATE DATABASE WITH TEMPLATE" + node.safe_psql( + "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE " + "backend_type = 'worker_spi dynamic';" + ) + node.safe_psql("SELECT injection_points_detach('procarray-reduce-count');") + pid = _launch_bgworker(node, "postgres", 1, "true") + _run_interruptible( + node, + "CREATE DATABASE testdb WITH TEMPLATE postgres", + "CREATE DATABASE WITH TEMPLATE", + pid, + ) + pid = _launch_bgworker(node, "testdb", 2, "true") + _run_interruptible( + node, "ALTER DATABASE testdb RENAME TO renameddb", "ALTER DATABASE RENAME", pid + ) + tablespace = tempfile.mkdtemp(prefix="ts_") + node.safe_psql("CREATE TABLESPACE test_tablespace LOCATION '{}'".format(tablespace)) + pid = _launch_bgworker(node, "renameddb", 3, "true") + _run_interruptible( + node, + "ALTER DATABASE renameddb SET TABLESPACE test_tablespace", + "ALTER DATABASE SET TABLESPACE", + pid, + ) + pid = _launch_bgworker(node, "renameddb", 4, "true") + _run_interruptible(node, "DROP DATABASE renameddb", "DROP DATABASE", pid) diff --git a/src/test/modules/xid_wraparound/meson.build b/src/test/modules/xid_wraparound/meson.build index 97ce670f9ac21..e68930c05ae0c 100644 --- a/src/test/modules/xid_wraparound/meson.build +++ b/src/test/modules/xid_wraparound/meson.build @@ -33,4 +33,12 @@ tests += { 't/004_notify_freeze.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_emergency_vacuum.py', + 'pyt/test_002_limits.py', + 'pyt/test_003_wraparounds.py', + 'pyt/test_004_notify_freeze.py', + ], + }, } diff --git a/src/test/modules/xid_wraparound/pyt/test_001_emergency_vacuum.py b/src/test/modules/xid_wraparound/pyt/test_001_emergency_vacuum.py new file mode 100644 index 0000000000000..c9e3ec200a4a7 --- /dev/null +++ b/src/test/modules/xid_wraparound/pyt/test_001_emergency_vacuum.py @@ -0,0 +1,103 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/xid_wraparound/t/001_emergency_vacuum.pl. + +Emergency (failsafe) autovacuum: with a long-running transaction pinning the +xmin horizon, consume XIDs until every database ages past vacuum_failsafe_age. +Once the old transaction commits, autovacuum must vacuum every table and log the +failsafe "bypassing nonessential maintenance" message for each table. Gated on +PG_TEST_EXTRA=xid_wraparound (slow: consumes ~2 billion XIDs). +""" + +import re + +import pypg + +pytestmark = pypg.require_test_extras("xid_wraparound") + + +def test_001_emergency_vacuum(create_pg): + """Failsafe autovacuum triggers and vacuums all tables past failsafe age.""" + node = create_pg("main", start=False) + node.append_conf( + "\n" + "autovacuum_naptime = 1s\n" + "autovacuum_max_workers = 1\n" + "log_autovacuum_min_duration = 0\n" + ) + node.start() + node.safe_psql("CREATE EXTENSION xid_wraparound") + node.safe_psql( + """ +CREATE TABLE large(id serial primary key, data text, filler text default repeat(random()::text, 10)) + WITH (autovacuum_enabled = off); +INSERT INTO large(data) SELECT generate_series(1,30000); +CREATE TABLE large_trunc(id serial primary key, data text, filler text default repeat(random()::text, 10)) + WITH (autovacuum_enabled = off); +INSERT INTO large_trunc(data) SELECT generate_series(1,30000); +CREATE TABLE small(id serial primary key, data text, filler text default repeat(random()::text, 10)) + WITH (autovacuum_enabled = off); +INSERT INTO small(data) SELECT generate_series(1,15000); +CREATE TABLE small_trunc(id serial primary key, data text, filler text default repeat(random()::text, 10)) + WITH (autovacuum_enabled = off); +INSERT INTO small_trunc(data) SELECT generate_series(1,15000); +""" + ) + psql_timeout_secs = 4 * pypg.test_timeout_default() + background_psql = node.background_psql( + "postgres", on_error_stop=False, timeout=psql_timeout_secs + ) + background_psql.set_query_timer_restart() + background_psql.query_safe( + """ + BEGIN; + DELETE FROM large WHERE id % 2 = 0; + DELETE FROM large_trunc WHERE id > 10000; + DELETE FROM small WHERE id % 2 = 0; + DELETE FROM small_trunc WHERE id > 1000; +""" + ) + node.safe_psql("SELECT consume_xids_until('2000000000'::xid8)") + node.safe_psql("INSERT INTO small(data) SELECT 1") + ret = node.safe_psql( + """ +SELECT datname, + age(datfrozenxid) > current_setting('vacuum_failsafe_age')::int as old +FROM pg_database ORDER BY 1 +""" + ) + assert ret == "postgres|t\ntemplate0|t\ntemplate1|t", "all tables became old" + log_offset = node.current_log_position() + background_psql.query_safe("COMMIT") + background_psql.quit() + assert node.poll_query_until( + """ +SELECT NOT EXISTS ( + SELECT * + FROM pg_database + WHERE age(datfrozenxid) > current_setting('autovacuum_freeze_max_age')::int) +""" + ), "timeout waiting for all databases to be vacuumed" + ret = node.safe_psql( + """ +SELECT relname, age(relfrozenxid) > current_setting('autovacuum_freeze_max_age')::int +FROM pg_class +WHERE relname IN ('large', 'large_trunc', 'small', 'small_trunc') +ORDER BY 1 +""" + ) + assert ret == ( + "large|f\nlarge_trunc|f\nsmall|f\nsmall_trunc|f" + ), "all tables are vacuumed" + log_contents = pypg.slurp_file(node.log, log_offset) + for tablename in ("large", "large_trunc", "small", "small_trunc"): + assert re.search( + r'bypassing nonessential maintenance of table "postgres\.public\.' + + tablename + + r'" as a failsafe after \d+ index scans', + log_contents, + ), ( + "failsafe vacuum triggered for " + tablename + ) + node.stop() diff --git a/src/test/modules/xid_wraparound/pyt/test_002_limits.py b/src/test/modules/xid_wraparound/pyt/test_002_limits.py new file mode 100644 index 0000000000000..5a69b229c7015 --- /dev/null +++ b/src/test/modules/xid_wraparound/pyt/test_002_limits.py @@ -0,0 +1,69 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/modules/xid_wraparound/t/002_limits.pl. + +XID exhaustion limits: with an old open transaction pinning the horizon, +consume XIDs until the server emits the "must be vacuumed within N +transactions" warning, then the "not accepting commands ... to avoid wraparound +data loss" stop-limit error. After the old transaction commits and VACUUM runs, +inserts succeed again. Gated on PG_TEST_EXTRA=xid_wraparound (slow). +""" + +import re + +import pypg + +pytestmark = pypg.require_test_extras("xid_wraparound") + + +def test_002_limits(create_pg): + """XID warn-limit and stop-limit fire, then VACUUM restores write access.""" + node = create_pg("wraparound", start=False) + node.append_conf("\nautovacuum_naptime = 1s\nlog_autovacuum_min_duration = 0\n") + node.start() + node.safe_psql("CREATE EXTENSION xid_wraparound") + node.safe_psql( + "\nCREATE TABLE wraparoundtest(t text) WITH (autovacuum_enabled = off);\n" + "INSERT INTO wraparoundtest VALUES ('start');\n" + ) + psql_timeout_secs = 4 * pypg.test_timeout_default() + background_psql = node.background_psql( + "postgres", on_error_stop=False, timeout=psql_timeout_secs + ) + background_psql.query_safe( + "\n\tBEGIN;\n\tINSERT INTO wraparoundtest VALUES ('oldxact');\n" + ) + node.safe_psql("SELECT consume_xids(1000000000)") + node.safe_psql("INSERT INTO wraparoundtest VALUES ('after 1 billion')") + node.safe_psql("SELECT consume_xids(1000000000)") + node.safe_psql("INSERT INTO wraparoundtest VALUES ('after 2 billion')") + warn_limit = 0 + for _ in range(1, 16): + res = node.psql_capture("SELECT consume_xids(10000000)") + assert res.rc == 0 # on_error_die => 1 + if re.search( + r'WARNING: database "postgres" must be vacuumed within [0-9]+ transactions', + res.stderr, + ): + warn_limit = 1 + break + assert warn_limit == 1, "warn-limit reached" + node.safe_psql("INSERT INTO wraparoundtest VALUES ('reached warn-limit')") + res = node.psql_capture("SELECT consume_xids(100000000)") + assert re.search( + r'ERROR: database is not accepting commands that assign new transaction IDs to avoid wraparound data loss in database "postgres"', + res.stderr, + ), "stop-limit" + background_psql.query_safe("COMMIT") + background_psql.quit() + node.safe_psql("VACUUM") + assert node.poll_query_until( + "INSERT INTO wraparoundtest VALUES ('after VACUUM')", "INSERT 0 1" + ) + ret = node.safe_psql("SELECT * from wraparoundtest") + assert ret == ( + "start\noldxact\nafter 1 billion\nafter 2 billion\n" + "reached warn-limit\nafter VACUUM" + ) + node.stop() diff --git a/src/test/modules/xid_wraparound/pyt/test_003_wraparounds.py b/src/test/modules/xid_wraparound/pyt/test_003_wraparounds.py new file mode 100644 index 0000000000000..0f6ce9154d0a4 --- /dev/null +++ b/src/test/modules/xid_wraparound/pyt/test_003_wraparounds.py @@ -0,0 +1,39 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/xid_wraparound/t/003_wraparounds.pl. + +Full XID-space wraparound: consume 100 batches of 100 million XIDs (~10 billion +total, several wraparounds) while autovacuum keeps the cluster alive, inserting +a marker row after each batch. All 101 rows (the initial plus 100 markers) must +survive. Gated on PG_TEST_EXTRA=xid_wraparound (very slow). +""" + +import pypg + +pytestmark = pypg.require_test_extras("xid_wraparound") + + +def test_003_wraparounds(create_pg): + """The cluster survives repeated XID wraparounds; all marker rows persist.""" + node = create_pg("wraparound", start=False) + node.append_conf( + "\n" + "autovacuum_naptime = 1s\n" + "autovacuum_max_workers = 1\n" + "log_autovacuum_min_duration = 0\n" + ) + node.start() + node.safe_psql("CREATE EXTENSION xid_wraparound") + node.safe_psql( + "\nCREATE TABLE wraparoundtest(t text) WITH (autovacuum_enabled = off);\n" + "INSERT INTO wraparoundtest VALUES ('beginning');\n" + ) + psql_timeout_secs = 4 * pypg.test_timeout_default() + for i in range(1, 101): + node.safe_psql("SELECT consume_xids(100000000)", timeout=psql_timeout_secs) + node.safe_psql( + "INSERT INTO wraparoundtest VALUES ('after {} batches')".format(i) + ) + ret = node.safe_psql("SELECT COUNT(*) FROM wraparoundtest") + assert ret == "101" + node.stop() diff --git a/src/test/modules/xid_wraparound/pyt/test_004_notify_freeze.py b/src/test/modules/xid_wraparound/pyt/test_004_notify_freeze.py new file mode 100644 index 0000000000000..1236f919e5593 --- /dev/null +++ b/src/test/modules/xid_wraparound/pyt/test_004_notify_freeze.py @@ -0,0 +1,56 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/modules/xid_wraparound/t/004_notify_freeze.pl. + +Pending async notifications survive XID freezing: a session LISTENs and holds an +open transaction while 10 NOTIFYs are queued and XIDs are consumed, then +vacuumdb --all --freeze advances datfrozenxid. After the listening session +commits, all 10 notifications must still be delivered in order. Gated on +PG_TEST_EXTRA=xid_wraparound. +""" + +import re + +import pypg + +pytestmark = pypg.require_test_extras("xid_wraparound") + + +def test_004_notify_freeze(create_pg): + """Queued notifications survive --freeze and are all delivered on commit.""" + node = create_pg("node") + node.safe_psql("CREATE EXTENSION xid_wraparound") + node.safe_psql("ALTER DATABASE template0 WITH ALLOW_CONNECTIONS true") + psql_session1 = node.background_psql("postgres") + psql_session1.query_safe("listen s;") + psql_session1.query_safe("begin;") + for i in range(1, 11): + node.safe_psql("NOTIFY s, '{}'".format(i)) + node.safe_psql("select consume_xids(10000000);") + node.safe_psql("select txid_current()") + datafronzenxid = node.safe_psql( + "select min(datfrozenxid::text::bigint) from pg_database" + ) + node.command_ok( + ["vacuumdb", "--all", "--freeze", "--port", str(node.port)], + "vacuumdb --all --freeze", + ) + datafronzenxid_freeze = node.safe_psql( + "select min(datfrozenxid::text::bigint) from pg_database" + ) + assert int(datafronzenxid_freeze) > int(datafronzenxid), "datfrozenxid advanced" + res = psql_session1.query_safe("commit;") + lines = res.split("\n") + while lines and lines[-1] == "": + lines.pop() + notifications_count = 0 + for line in lines: + notifications_count += 1 + assert re.search( + r'Asynchronous notification "s" with payload "{}" received'.format( + notifications_count + ), + line, + ) + assert notifications_count == 10, "received all committed notifications" + node.stop() diff --git a/src/test/pytest/pypg/oauthserver.py b/src/test/pytest/pypg/oauthserver.py new file mode 100644 index 0000000000000..ca32a37a12872 --- /dev/null +++ b/src/test/pytest/pypg/oauthserver.py @@ -0,0 +1,91 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Python port of src/test/modules/oauth_validator/t/OAuth/Server.pm. + +Glue between the pytest tests and the mock OAuth authorization server daemon +implemented in t/oauth_server.py. The daemon serves HTTPS on 127.0.0.1 (IPv4 +only) using the SSL certificates under cert_dir; libpq must point PGOAUTHCAFILE +at the matching CA. The daemon prints its ephemeral port number to stdout and +then closes stdout, so the parent reads to EOF to learn the port (mirroring the +popen()-based handshake in Server.pm). +""" + +import os +import pathlib +import signal +import subprocess +import sys +from typing import Optional + + +class OAuthServer: + """Runs the mock OAuth authorization server daemon for a test module. + + Mirrors OAuth::Server: run() launches t/oauth_server.py, captures the + advertised port, and stop() sends SIGTERM and waits for the daemon to exit. + """ + + def __init__(self) -> None: + self._proc: Optional[subprocess.Popen] = None + self._port: Optional[int] = None + + @property + def port(self) -> int: + """Return the port the daemon is listening on (set by run()).""" + if self._port is None: + raise RuntimeError("OAuth server has not been started") + return self._port + + def run(self) -> None: + """Launch the authorization server daemon in t/oauth_server.py. + + Uses the PYTHON interpreter from the environment when set (as + Server.pm does), falling back to the interpreter running the tests. + The daemon prints its port to stdout and then closes stdout; we read + the entire stream to obtain the port number. + """ + script = ( + pathlib.Path(__file__).resolve().parents[4] + / "src" + / "test" + / "modules" + / "oauth_validator" + / "t" + / "oauth_server.py" + ) + python = os.environ.get("PYTHON") or sys.executable + + # pylint: disable-next=consider-using-with + self._proc = subprocess.Popen( + [python, str(script)], + stdout=subprocess.PIPE, + encoding="utf-8", + ) + + assert self._proc.stdout is not None + line = self._proc.stdout.read() + if not line: + raise RuntimeError("failed to read port number from OAuth server") + + text = line.strip() + if not text.isdigit(): + raise RuntimeError( + "OAuth server did not advertise a valid port: {!r}".format(text) + ) + self._port = int(text) + + def stop(self) -> None: + """Send SIGTERM to the daemon and wait for it to exit. + + Idempotent: a second call (or a call before run()) is a no-op, matching + the END-block guard in the Perl tests. + """ + if self._proc is None: + return + + self._proc.send_signal(signal.SIGTERM) + if self._proc.stdout is not None: + self._proc.stdout.close() + self._proc.wait() + self._proc = None + self._port = None From 52ca526b2c036956bcf2640be21636561231fbd5 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:34 -0400 Subject: [PATCH 16/36] pytest: port authentication, SSL, LDAP, and Kerberos TAP suites Port src/test/authentication, src/test/ssl, src/test/ldap, and src/test/kerberos to pytest, including the mock LDAP and SSL helper servers and the Kerberos KDC harness. SSL cert/key paths and service-file paths are written with forward slashes so the conninfo strings parse on Windows. Co-authored-by: Andrew Dunstan Co-authored-by: Greg Burd --- src/test/authentication/meson.build | 14 + .../authentication/pyt/test_001_password.py | 862 ++++++++++++++++ .../authentication/pyt/test_002_saslprep.py | 72 ++ src/test/authentication/pyt/test_003_peer.py | 361 +++++++ .../pyt/test_004_file_inclusion.py | 295 ++++++ src/test/authentication/pyt/test_005_sspi.py | 41 + .../pyt/test_006_login_trigger.py | 150 +++ .../authentication/pyt/test_007_pre_auth.py | 51 + src/test/kerberos/meson.build | 11 + src/test/kerberos/pyt/test_001_auth.py | 716 +++++++++++++ src/test/ldap/meson.build | 10 + src/test/ldap/pyt/conftest.py | 13 + src/test/ldap/pyt/test_001_auth.py | 386 +++++++ src/test/ldap/pyt/test_002_bindpasswd.py | 97 ++ .../test_003_ldap_connection_param_lookup.py | 193 ++++ src/test/pytest/pypg/ldapserver.py | 432 ++++++++ src/test/pytest/pypg/ssl_server.py | 329 ++++++ src/test/ssl/meson.build | 8 + src/test/ssl/pyt/conftest.py | 6 +- src/test/ssl/pyt/test_001_ssltests.py | 970 ++++++++++++++++++ src/test/ssl/pyt/test_002_scram.py | 168 +++ src/test/ssl/pyt/test_003_sslinfo.py | 181 ++++ src/test/ssl/pyt/test_004_sni.py | 457 +++++++++ src/test/ssl/pyt/test_client.py | 1 + src/test/ssl/pyt/test_server.py | 1 + 25 files changed, 5823 insertions(+), 2 deletions(-) create mode 100644 src/test/authentication/pyt/test_001_password.py create mode 100644 src/test/authentication/pyt/test_002_saslprep.py create mode 100644 src/test/authentication/pyt/test_003_peer.py create mode 100644 src/test/authentication/pyt/test_004_file_inclusion.py create mode 100644 src/test/authentication/pyt/test_005_sspi.py create mode 100644 src/test/authentication/pyt/test_006_login_trigger.py create mode 100644 src/test/authentication/pyt/test_007_pre_auth.py create mode 100644 src/test/kerberos/pyt/test_001_auth.py create mode 100644 src/test/ldap/pyt/conftest.py create mode 100644 src/test/ldap/pyt/test_001_auth.py create mode 100644 src/test/ldap/pyt/test_002_bindpasswd.py create mode 100644 src/test/ldap/pyt/test_003_ldap_connection_param_lookup.py create mode 100644 src/test/pytest/pypg/ldapserver.py create mode 100644 src/test/pytest/pypg/ssl_server.py create mode 100644 src/test/ssl/pyt/test_001_ssltests.py create mode 100644 src/test/ssl/pyt/test_002_scram.py create mode 100644 src/test/ssl/pyt/test_003_sslinfo.py create mode 100644 src/test/ssl/pyt/test_004_sni.py diff --git a/src/test/authentication/meson.build b/src/test/authentication/meson.build index 282a5054e2ceb..d8a7875d362f5 100644 --- a/src/test/authentication/meson.build +++ b/src/test/authentication/meson.build @@ -18,4 +18,18 @@ tests += { 't/007_pre_auth.pl', ], }, + 'pytest': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + 'tests': [ + 'pyt/test_001_password.py', + 'pyt/test_002_saslprep.py', + 'pyt/test_003_peer.py', + 'pyt/test_004_file_inclusion.py', + 'pyt/test_006_login_trigger.py', + 'pyt/test_005_sspi.py', + 'pyt/test_007_pre_auth.py', + ], + }, } diff --git a/src/test/authentication/pyt/test_001_password.py b/src/test/authentication/pyt/test_001_password.py new file mode 100644 index 0000000000000..45f78995ce8eb --- /dev/null +++ b/src/test/authentication/pyt/test_001_password.py @@ -0,0 +1,862 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/authentication/t/001_password.pl. + +Set of tests for authentication and pg_hba.conf, exercising the password +methods Plain, MD5-encrypted and SCRAM-encrypted, plus the require_auth +connection option, the log_connections GUC, SYSTEM_USER (including with parallel +workers), password expiration warnings, channel binding without SSL, .pgpass +processing, regular-expression matching for user/database names in pg_hba.conf, +and role membership policies (+role, samerole, samegroup). Requires +Unix-domain sockets. +""" + +import os +import re +import sys +import time + +import pytest + + +def _reset_pg_hba(node, database, role, hba_method): + """Delete pg_hba.conf and write a single 'local ' rule. + + The rule deliberately uses a continuation line (backslash-newline) to + exercise that parser path, mirroring the Perl helper, then reloads. + """ + (node.datadir / "pg_hba.conf").unlink(missing_ok=True) + node.append_conf( + "local {} {}\\\n {}".format(database, role, hba_method), + filename="pg_hba.conf", + ) + node.reload() + + +def _test_conn( + node, + connstr, + method, + expected_res, + *, + log_like=None, + log_unlike=None, + expected_stderr=None, +): + """Connect with connstr; assert success (0) or failure (else) and log match. + + For failures only the status code (and optional log match) is checked, as in + the Perl test_conn helper; expected_stderr applies to the success case. + """ + status_string = "success" if expected_res == 0 else "failed" + testname = "authentication {} for method {}, connstr {}".format( + status_string, method, connstr + ) + if expected_res == 0: + node.connect_ok( + connstr, + testname, + expected_stderr=expected_stderr, + log_like=log_like, + log_unlike=log_unlike, + ) + else: + node.connect_fails(connstr, testname, log_like=log_like, log_unlike=log_unlike) + + +def _set_pgpassword(value): + if value is None: + os.environ.pop("PGPASSWORD", None) + else: + os.environ["PGPASSWORD"] = value + + +def _setup_roles(node): + """Create roles for the password_expiration_warning_threshold tests.""" + current_year = time.localtime().tm_year + expire_year = current_year - 1 + node.safe_psql( + "CREATE ROLE expired LOGIN VALID UNTIL '{}-01-01' PASSWORD 'pass'".format( + expire_year + ) + ) + expire_year = current_year + 2 + node.safe_psql( + "CREATE ROLE expiration_warnings LOGIN VALID UNTIL '{}-01-01' " + "PASSWORD 'pass'".format(expire_year) + ) + expire_year = current_year + 5 + node.safe_psql( + "CREATE ROLE no_warnings LOGIN VALID UNTIL '{}-01-01' " + "PASSWORD 'pass'".format(expire_year) + ) + + +def _test_log_connections(node): + """Test behavior of the log_connections GUC.""" + node.safe_psql("CREATE DATABASE test_log_connections") + + log_connections = node.safe_psql( + "SHOW log_connections;", dbname="test_log_connections" + ) + assert log_connections == "on", "check log connections has expected value 'on'" + + node.connect_ok( + "test_log_connections", + "log_connections 'on' works as expected for backwards compatibility", + log_like=[ + r"connection received", + r"connection authenticated", + r"connection authorized: user=\S+ database=test_log_connections", + ], + log_unlike=[r"connection ready"], + ) + + node.safe_psql( + "ALTER SYSTEM SET log_connections = " + "receipt,authorization,setup_durations;\n" + "SELECT pg_reload_conf();", + dbname="test_log_connections", + ) + node.connect_ok( + "test_log_connections", + "log_connections with subset of specified options logs only those aspects", + log_like=[ + r"connection received", + r"connection authorized: user=\S+ database=test_log_connections", + r"connection ready", + ], + log_unlike=[r"connection authenticated"], + ) + + node.safe_psql( + "ALTER SYSTEM SET log_connections = 'all'; SELECT pg_reload_conf();", + dbname="test_log_connections", + ) + node.connect_ok( + "test_log_connections", + "log_connections 'all' logs all available connection aspects", + log_like=[ + r"connection received", + r"connection authenticated", + r"connection authorized: user=\S+ database=test_log_connections", + r"connection ready", + ], + ) + + +def _create_password_roles(node, md5_works): + """Create roles with SCRAM/MD5 passwords and tables for SYSTEM_USER tests.""" + assert ( + node.psql_capture( + "SET password_encryption='scram-sha-256'; " + "CREATE ROLE scram_role LOGIN PASSWORD 'pass';" + ).rc + == 0 + ), "created user with SCRAM password" + expected_md5_rc = 0 if md5_works else 3 + assert ( + node.psql_capture( + "SET password_encryption='md5'; " + "CREATE ROLE md5_role LOGIN PASSWORD 'pass';" + ).rc + == expected_md5_rc + ), "created user with md5 password" + + node.safe_psql( + "CREATE TABLE sysuser_data (n) AS SELECT NULL FROM generate_series(1, 10);" + " GRANT ALL ON sysuser_data TO scram_role;" + ) + os.environ["PGPASSWORD"] = "pass" + + # A role that contains a comma to stress the parsing. + node.safe_psql( + "SET password_encryption='scram-sha-256'; " + "CREATE ROLE \"scram,role\" LOGIN PASSWORD 'pass';" + ) + + # A role with a non-default iteration count. + node.safe_psql( + "SET password_encryption='scram-sha-256';\n" + " SET scram_iterations=1024;\n" + " CREATE ROLE scram_role_iter LOGIN PASSWORD 'pass';\n" + " RESET scram_iterations;" + ) + res = node.safe_psql( + "SELECT substr(rolpassword,1,19) FROM pg_authid " + "WHERE rolname = 'scram_role_iter'" + ) + assert res == "SCRAM-SHA-256$1024:", "scram_iterations in server side ROLE" + + +def _test_password_command(node): + """Clientside \\password uses scram_iterations when computing SCRAM secrets. + + Mirrors the IO::Pty-gated block in the Perl original: an interactive psql + session sets scram_iterations and runs \\password, then the stored secret is + checked to confirm the client-side iteration count was used. + + pypg's BackgroundPsql drives psql over pipes, not a PTY. psql's \\password + prompts via simple_prompt_extended, which opens /dev/tty; when no + controlling terminal is available (the meson test harness and this sandbox) + it falls back to reading from stdin. The prompts carry no trailing newline, + so the line-buffered reader cannot match them with query_until the way + IPC::Run's byte-level pump does; the password lines are instead fed in + sequence (psql consumes them in order from stdin), reproducing the same + scenario and assertion faithfully. + """ + session = node.background_psql("postgres") + try: + session.set_query_timer_restart() + session.query("SET password_encryption='scram-sha-256';") + session.query("SET scram_iterations=42;") + session.send("\\password scram_role_iter\npass\npass\n") + time.sleep(1.0) + finally: + session.quit() + + res = node.safe_psql( + "SELECT substr(rolpassword,1,17) FROM pg_authid " + "WHERE rolname = 'scram_role_iter'" + ) + assert res == "SCRAM-SHA-256$42:", "scram_iterations in psql \\password command" + + +def _test_trust(node, md5_works): + """For "trust" method, all users can connect; check SYSTEM_USER and require_auth.""" + _reset_pg_hba(node, "all", "all", "trust") + _test_conn( + node, + "user=scram_role", + "trust", + 0, + log_like=[r'connection authenticated: user="scram_role" method=trust'], + ) + if md5_works: + _test_conn( + node, + "user=md5_role", + "trust", + 0, + log_like=[r'connection authenticated: user="md5_role" method=trust'], + ) + + # SYSTEM_USER is null when not authenticated. + res = node.safe_psql("SELECT SYSTEM_USER IS NULL;") + assert res == "t", "users with trust authentication use SYSTEM_USER = NULL" + + # SYSTEM_USER with parallel workers when not authenticated. + res = node.safe_psql( + "SET min_parallel_table_scan_size TO 0;\n" + "SET parallel_setup_cost TO 0;\n" + "SET parallel_tuple_cost TO 0;\n" + "SET max_parallel_workers_per_gather TO 2;\n" + "SELECT bool_and(SYSTEM_USER IS NOT DISTINCT FROM n) FROM sysuser_data;", + connstr="user=scram_role", + ) + assert ( + res == "t" + ), "users with trust authentication use SYSTEM_USER = NULL in parallel workers" + + _test_require_auth_trust(node) + + +def _test_require_auth_trust(node): + """require_auth interactions with trust authentication.""" + node.connect_ok("user=scram_role require_auth=", "empty require_auth succeeds") + + fail_methods = { + "gss": "gss", + "sspi": "sspi", + "password": "password", + "md5": "md5", + "scram-sha-256": "scram-sha-256", + "password,scram-sha-256": "password,scram-sha-256", + } + names = { + "gss": "GSS authentication required, fails with trust auth", + "sspi": "SSPI authentication required, fails with trust auth", + "password": "password authentication required, fails with trust auth", + "md5": "MD5 authentication required, fails with trust auth", + "scram-sha-256": "SCRAM authentication required, fails with trust auth", + "password,scram-sha-256": ( + "password and SCRAM authentication required, fails with trust auth" + ), + } + for method, value in fail_methods.items(): + node.connect_fails( + "user=scram_role require_auth={}".format(value), + names[method], + expected_stderr=r'authentication method requirement "{}" failed: ' + r"server did not complete authentication".format(re.escape(value)), + ) + + for value, name in [ + ("!gss", "GSS authentication can be forbidden, succeeds with trust auth"), + ("!sspi", "SSPI authentication can be forbidden, succeeds with trust auth"), + ( + "!password", + "password authentication can be forbidden, succeeds with trust auth", + ), + ("!md5", "md5 authentication can be forbidden, succeeds with trust auth"), + ( + "!scram-sha-256", + "SCRAM authentication can be forbidden, succeeds with trust auth", + ), + ( + "!password,!scram-sha-256", + "multiple authentication types forbidden, succeeds with trust auth", + ), + ]: + node.connect_ok("user=scram_role require_auth={}".format(value), name) + + node.connect_ok( + "user=scram_role require_auth=none", + "all authentication types forbidden, succeeds with trust auth", + ) + node.connect_fails( + "user=scram_role require_auth=!none", + "any authentication types required, fails with trust auth", + expected_stderr=r"server did not complete authentication", + ) + + _test_require_auth_invalid(node) + + +def _test_require_auth_invalid(node): + """require_auth values that are syntactically invalid.""" + node.connect_fails( + "user=scram_role require_auth=scram-sha-256,!md5", + "negative require_auth methods cannot be mixed with positive ones", + expected_stderr=r'negative require_auth method "!md5" cannot be mixed ' + r"with non-negative methods", + ) + node.connect_fails( + "user=scram_role require_auth=!password,!none,scram-sha-256", + "positive require_auth methods cannot be mixed with negative one", + expected_stderr=r'require_auth method "scram-sha-256" cannot be mixed ' + r"with negative methods", + ) + dup_cases = [ + ( + "password,md5,password", + "require_auth methods cannot include duplicates, positive case", + r'require_auth method "password" is specified more than once', + ), + ( + "!password,!md5,!password", + "require_auth methods cannot be duplicated, negative case", + r'require_auth method "!password" is specified more than once', + ), + ( + "none,md5,none", + "require_auth methods cannot be duplicated, none case", + r'require_auth method "none" is specified more than once', + ), + ( + "!none,!md5,!none", + "require_auth methods cannot be duplicated, !none case", + r'require_auth method "!none" is specified more than once', + ), + ( + "scram-sha-256,scram-sha-256", + "require_auth methods cannot be duplicated, scram-sha-256 case", + r'require_auth method "scram-sha-256" is specified more than once', + ), + ( + "!scram-sha-256,!scram-sha-256", + "require_auth methods cannot be duplicated, !scram-sha-256 case", + r'require_auth method "!scram-sha-256" is specified more than once', + ), + ] + for value, name, stderr in dup_cases: + node.connect_fails( + "user=scram_role require_auth={}".format(value), + name, + expected_stderr=stderr, + ) + node.connect_fails( + "user=scram_role require_auth=none,abcdefg", + "unknown require_auth methods are rejected", + expected_stderr=r'invalid require_auth value: "abcdefg"', + ) + + +def _test_password_method(node, md5_works): + """For plain "password" method, all users can connect; require_auth checks.""" + _reset_pg_hba(node, "all", "all", "password") + _test_conn( + node, + "user=scram_role", + "password", + 0, + log_like=[r'connection authenticated: identity="scram_role" method=password'], + ) + if md5_works: + _test_conn( + node, + "user=md5_role", + "password", + 0, + log_like=[r'connection authenticated: identity="md5_role" method=password'], + ) + + node.connect_ok( + "user=scram_role require_auth=password", + "password authentication required, works with password auth", + ) + node.connect_ok( + "user=scram_role require_auth=!none", + "any authentication required, works with password auth", + ) + node.connect_ok( + "user=scram_role require_auth=scram-sha-256,password,md5", + "multiple authentication types required, works with password auth", + ) + + node.connect_fails( + "user=scram_role require_auth=md5", + "md5 authentication required, fails with password auth", + expected_stderr=r'authentication method requirement "md5" failed: ' + r"server requested a cleartext password", + ) + node.connect_fails( + "user=scram_role require_auth=scram-sha-256", + "SCRAM authentication required, fails with password auth", + expected_stderr=r'authentication method requirement "scram-sha-256" ' + r"failed: server requested a cleartext password", + ) + node.connect_fails( + "user=scram_role require_auth=none", + "all authentication forbidden, fails with password auth", + expected_stderr=r'authentication method requirement "none" failed: ' + r"server requested a cleartext password", + ) + node.connect_fails( + "user=scram_role require_auth=!password", + "password authentication forbidden, fails with password auth", + expected_stderr=r"server requested a cleartext password", + ) + node.connect_fails( + "user=scram_role require_auth=!password,!md5,!scram-sha-256", + "multiple authentication types forbidden, fails with password auth", + expected_stderr=r' method requirement "!password,!md5,!scram-sha-256" ' + r"failed: server requested a cleartext password", + ) + + +def _test_scram_method(node): + """For "scram-sha-256" method: scram_role connects, md5_role fails.""" + _reset_pg_hba(node, "all", "all", "scram-sha-256") + _test_conn( + node, + "user=scram_role", + "scram-sha-256", + 0, + log_like=[ + r'connection authenticated: identity="scram_role" method=scram-sha-256' + ], + ) + _test_conn( + node, + "user=scram_role_iter", + "scram-sha-256", + 0, + log_like=[ + r'connection authenticated: identity="scram_role_iter" ' + r"method=scram-sha-256" + ], + ) + _test_conn( + node, + "user=md5_role", + "scram-sha-256", + 2, + log_unlike=[r"connection authenticated:"], + ) + + node.connect_ok( + "user=scram_role require_auth=scram-sha-256", + "SCRAM authentication required, works with SCRAM auth", + ) + node.connect_ok( + "user=scram_role require_auth=!none", + "any authentication required, works with SCRAM auth", + ) + node.connect_ok( + "user=scram_role require_auth=password,scram-sha-256,md5", + "multiple authentication types required, works with SCRAM auth", + ) + + node.connect_fails( + "user=scram_role require_auth=password", + "password authentication required, fails with SCRAM auth", + expected_stderr=r'authentication method requirement "password" failed: ' + r"server requested SASL authentication", + ) + node.connect_fails( + "user=scram_role require_auth=md5", + "md5 authentication required, fails with SCRAM auth", + expected_stderr=r'authentication method requirement "md5" failed: ' + r"server requested SASL authentication", + ) + node.connect_fails( + "user=scram_role require_auth=none", + "all authentication forbidden, fails with SCRAM auth", + expected_stderr=r'authentication method requirement "none" failed: ' + r"server requested SASL authentication", + ) + node.connect_fails( + "user=scram_role require_auth=!scram-sha-256", + "SCRAM authentication forbidden, fails with SCRAM auth", + expected_stderr=r"server requested SCRAM-SHA-256 authentication", + ) + node.connect_fails( + "user=scram_role require_auth=!password,!md5,!scram-sha-256", + "multiple authentication types forbidden, fails with SCRAM auth", + expected_stderr=r"server requested SCRAM-SHA-256 authentication", + ) + + # Bad passwords are rejected. + os.environ["PGPASSWORD"] = "badpass" + _test_conn( + node, + "user=scram_role", + "scram-sha-256", + 2, + log_unlike=[r"connection authenticated:"], + ) + os.environ["PGPASSWORD"] = "pass" + + +def _test_md5_method(node, md5_works): + """For "md5" method: all users connect (SCRAM used for SCRAM secrets).""" + _reset_pg_hba(node, "all", "all", "md5") + _test_conn( + node, + "user=scram_role", + "md5", + 0, + log_like=[r'connection authenticated: identity="scram_role" method=md5'], + ) + if md5_works: + _test_conn( + node, + "user=md5_role", + "md5", + 0, + expected_stderr=r"authenticated with an MD5-encrypted password", + log_like=[r'connection authenticated: identity="md5_role" method=md5'], + ) + + node.connect_ok( + "user=scram_role require_auth=scram-sha-256", + "SCRAM authentication required, works with SCRAM auth", + ) + node.connect_ok( + "user=scram_role require_auth=!none", + "any authentication required, works with SCRAM auth", + ) + node.connect_ok( + "user=scram_role require_auth=md5,scram-sha-256,password", + "multiple authentication types required, works with SCRAM auth", + ) + + node.connect_fails( + "user=scram_role require_auth=password", + "password authentication required, fails with SCRAM auth", + expected_stderr=r'authentication method requirement "password" failed: ' + r"server requested SASL authentication", + ) + node.connect_fails( + "user=scram_role require_auth=md5", + "MD5 authentication required, fails with SCRAM auth", + expected_stderr=r'authentication method requirement "md5" failed: ' + r"server requested SASL authentication", + ) + node.connect_fails( + "user=scram_role require_auth=none", + "all authentication types forbidden, fails with SCRAM auth", + expected_stderr=r'authentication method requirement "none" failed: ' + r"server requested SASL authentication", + ) + node.connect_fails( + "user=scram_role require_auth=!scram-sha-256", + "password authentication forbidden, fails with SCRAM auth", + expected_stderr=r'authentication method requirement "!scram-sha-256" ' + r"failed: server requested SCRAM-SHA-256 authentication", + ) + node.connect_fails( + "user=scram_role require_auth=!password,!md5,!scram-sha-256", + "multiple authentication types forbidden, fails with SCRAM auth", + expected_stderr=r"authentication method requirement " + r'"!password,!md5,!scram-sha-256" failed: server requested ' + r"SCRAM-SHA-256 authentication", + ) + + +def _test_password_expiration(node): + """Test password_expiration_warning_threshold behaviour.""" + node.connect_fails( + "user=expired dbname=postgres", + "connection fails due to expired password", + expected_stderr=r'password authentication failed for user "expired"', + ) + node.connect_ok( + "user=expiration_warnings dbname=postgres", + "connection succeeds with password expiration warning", + expected_stderr=r"role password will expire soon", + ) + node.connect_ok( + "user=no_warnings dbname=postgres", + "connection succeeds with no password expiration warning", + ) + + +def _test_system_user_parallel(node): + """SYSTEM_USER != NULL with parallel workers under md5.""" + node.safe_psql( + "TRUNCATE sysuser_data;\n" + "INSERT INTO sysuser_data SELECT 'md5:scram_role' " + "FROM generate_series(1, 10);", + connstr="user=scram_role", + ) + res = node.safe_psql( + "SET min_parallel_table_scan_size TO 0;\n" + "SET parallel_setup_cost TO 0;\n" + "SET parallel_tuple_cost TO 0;\n" + "SET max_parallel_workers_per_gather TO 2;\n" + "SELECT bool_and(SYSTEM_USER IS NOT DISTINCT FROM n) FROM sysuser_data;", + connstr="user=scram_role", + ) + assert res == "t", ( + "users with md5 authentication use SYSTEM_USER = md5:role " + "in parallel workers" + ) + + +def _test_channel_binding(node): + """Channel binding without SSL can't work, for password and SCRAM methods.""" + _reset_pg_hba(node, "all", "all", "password") + os.environ["PGCHANNELBINDING"] = "require" + _test_conn(node, "user=scram_role", "scram-sha-256", 2) + _reset_pg_hba(node, "all", "all", "scram-sha-256") + os.environ["PGCHANNELBINDING"] = "require" + _test_conn(node, "user=scram_role", "scram-sha-256", 2) + + +def _test_pgpass(node, tmp_path): + """Test .pgpass processing using a temporary file.""" + pgpassfile = str(tmp_path / "pgpass") + _set_pgpassword(None) + os.environ.pop("PGCHANNELBINDING", None) + os.environ["PGPASSFILE"] = pgpassfile + + _unlink(pgpassfile) + long_comment = ( + "This very long comment is just here to exercise handling " + "of long lines in the file. " + ) + with open(pgpassfile, "w", encoding="utf-8") as fh: + fh.write( + "\n# {}\n*:*:postgres:scram_role:pass:this is not part of the " + "password.\n".format(long_comment * 5) + ) + os.chmod(pgpassfile, 0o600) + + _reset_pg_hba(node, "all", "all", "password") + _test_conn(node, "user=scram_role", "password from pgpass", 0) + _test_conn(node, "user=md5_role", "password from pgpass", 2) + + with open(pgpassfile, "a", encoding="utf-8") as fh: + fh.write("\n*:*:*:scram_role:p\\ass\n*:*:*:scram,role:p\\ass\n") + + _test_conn(node, "user=scram_role", "password from pgpass", 0) + + _test_regex_hba(node) + + _unlink(pgpassfile) + os.environ.pop("PGPASSFILE", None) + + +def _test_regex_hba(node): + """Regular-expression matching for user/database names in pg_hba.conf.""" + # User regexp; the third regexp matches. + _reset_pg_hba(node, "all", "/^.*nomatch.*$, baduser, /^scr.*$", "password") + _test_conn( + node, + "user=scram_role", + "password, matching regexp for username", + 0, + log_like=[r'connection authenticated: identity="scram_role" method=password'], + ) + # The third regexp no longer matches. + _reset_pg_hba(node, "all", "/^.*nomatch.*$, baduser, /^sc_r.*$", "password") + _test_conn( + node, + "user=scram_role", + "password, non matching regexp for username", + 2, + log_unlike=[r"connection authenticated:"], + ) + # A comma in the regular expression; double quotes are mandatory. + _reset_pg_hba(node, "all", '"/^.*m,.*e$"', "password") + _test_conn( + node, + "user=scram,role", + "password, matching regexp for username", + 0, + log_like=[r'connection authenticated: identity="scram,role" method=password'], + ) + # dbname regexp; the third regexp matches. + _reset_pg_hba(node, "/^.*nomatch.*$, baddb, /^regex_t.*b$", "all", "password") + _test_conn( + node, + "user=scram_role dbname=regex_testdb", + "password, matching regexp for dbname", + 0, + log_like=[r'connection authenticated: identity="scram_role" method=password'], + ) + # The third regexp no longer matches. + _reset_pg_hba(node, "/^.*nomatch.*$, baddb, /^regex_t.*ba$", "all", "password") + _test_conn( + node, + "user=scram_role dbname=regex_testdb", + "password, non matching regexp for dbname", + 2, + log_unlike=[r"connection authenticated:"], + ) + + +def _test_role_membership(node): + """Authentication tests with specific HBA policies on roles.""" + _reset_pg_hba(node, "all", "all", "trust") + node.safe_psql("CREATE DATABASE regress_regression_group;") + node.safe_psql( + "CREATE ROLE regress_regression_group LOGIN PASSWORD 'pass';\n" + "CREATE ROLE regress_member LOGIN SUPERUSER IN ROLE " + "regress_regression_group PASSWORD 'pass';\n" + "CREATE ROLE regress_not_member LOGIN SUPERUSER PASSWORD 'pass';" + ) + os.environ["PGPASSWORD"] = "pass" + + auth_re = r'connection authenticated: identity="{}" method=scram-sha-256' + + # Exact matching, no members allowed. + _reset_pg_hba(node, "all", "regress_regression_group", "scram-sha-256") + _membership_triple(node, auth_re, member_ok=False, not_member_ok=False) + + # '+' membership: all members are allowed. + _reset_pg_hba(node, "all", "+regress_regression_group", "scram-sha-256") + _membership_triple(node, auth_re, member_ok=True, not_member_ok=False) + + # samerole respects membership. The Perl test sets PGDATABASE to select the + # connection database; pypg's connection environment always pins + # PGDATABASE=postgres, so the database is carried in the connstr instead + # (semantically identical for these connection-outcome checks). + _reset_pg_hba(node, "samerole", "all", "scram-sha-256") + _membership_triple( + node, + auth_re, + member_ok=True, + not_member_ok=False, + dbname="regress_regression_group", + ) + + # samegroup respects membership. + _reset_pg_hba(node, "samegroup", "all", "scram-sha-256") + _membership_triple( + node, + auth_re, + member_ok=True, + not_member_ok=False, + dbname="regress_regression_group", + ) + + +def _membership_triple(node, auth_re, *, member_ok, not_member_ok, dbname=None): + """Run the group/member/not-member connection triple for a membership policy. + + dbname, when given, is appended to each connstr so the connection targets a + specific database (used by the samerole/samegroup policies). + """ + suffix = " dbname={}".format(dbname) if dbname else "" + _test_conn( + node, + "user=regress_regression_group" + suffix, + "scram-sha-256", + 0, + log_like=[auth_re.format("regress_regression_group")], + ) + if member_ok: + _test_conn( + node, + "user=regress_member" + suffix, + "scram-sha-256", + 0, + log_like=[auth_re.format("regress_member")], + ) + else: + _test_conn( + node, + "user=regress_member" + suffix, + "scram-sha-256", + 2, + log_unlike=[auth_re.format("regress_member")], + ) + if not_member_ok: + _test_conn( + node, + "user=regress_not_member" + suffix, + "scram-sha-256", + 0, + log_like=[auth_re.format("regress_not_member")], + ) + else: + _test_conn( + node, + "user=regress_not_member" + suffix, + "scram-sha-256", + 2, + log_unlike=[auth_re.format("regress_not_member")], + ) + + +def _unlink(path): + try: + os.unlink(path) + except FileNotFoundError: + pass + + +@pytest.mark.skipif(sys.platform == "win32", reason="needs Unix-domain sockets") +def test_001_password(create_pg, tmp_path): + """Password authentication, require_auth, log_connections, pgpass and HBA.""" + node = create_pg("primary", start=False) + node.append_conf("log_connections = on\n") + # Needed to allow connect_fails to inspect the postmaster log. + node.append_conf("log_min_messages = debug2") + node.append_conf("password_expiration_warning_threshold = '1100d'") + node.start() + + _setup_roles(node) + _test_log_connections(node) + + # md5 could fail in FIPS mode. + md5_works = node.psql_capture("select md5('')").rc == 0 + + _create_password_roles(node, md5_works) + _test_password_command(node) + + # Database used by the regular-expression dbname tests. + node.safe_psql("CREATE database regex_testdb;") + + _test_trust(node, md5_works) + _test_password_method(node, md5_works) + _test_scram_method(node) + _test_md5_method(node, md5_works) + _test_password_expiration(node) + _test_system_user_parallel(node) + _test_channel_binding(node) + _test_pgpass(node, tmp_path) + _test_role_membership(node) diff --git a/src/test/authentication/pyt/test_002_saslprep.py b/src/test/authentication/pyt/test_002_saslprep.py new file mode 100644 index 0000000000000..c3b6607c7a942 --- /dev/null +++ b/src/test/authentication/pyt/test_002_saslprep.py @@ -0,0 +1,72 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/authentication/t/002_saslprep.pl. + +SCRAM authentication applies SASLprep normalization to passwords: equivalent +Unicode forms of a stored password authenticate successfully, while +non-equivalent ones fail. Roles are created with various passwords and logged in +with byte sequences that should normalize to the stored value (success) or not +(failure). +""" + +import os +import sys + +import pytest + + +def _reset_pg_hba(node, method): + (node.datadir / "pg_hba.conf").unlink(missing_ok=True) + node.append_conf("local all all {}".format(method), filename="pg_hba.conf") + node.reload() + + +def _test_login(node, role, password, expected_ok): + connstr = "user={}".format(role) + status = "success" if expected_ok else "failed" + name = "authentication {} for role {} with password {!r}".format( + status, role, password + ) + os.environ["PGPASSWORD"] = password + if expected_ok: + node.connect_ok(connstr, name) + else: + node.connect_fails(connstr, name) + + +@pytest.mark.skipif(sys.platform == "win32", reason="needs Unix-domain sockets") +def test_002_saslprep(create_pg): + """SASLprep-equivalent passwords authenticate; non-equivalent ones fail.""" + node = create_pg("primary", extra=["--locale=C", "--encoding=UTF8"], start=False) + node.start() + node.safe_psql( + "SET password_encryption='scram-sha-256';\nSET client_encoding='utf8';\n" + "CREATE ROLE saslpreptest1_role LOGIN PASSWORD 'IX';\n" + "CREATE ROLE saslpreptest4a_role LOGIN PASSWORD 'a';\n" + "CREATE ROLE saslpreptest4b_role LOGIN PASSWORD E'\\xc2\\xaa';\n" + "CREATE ROLE saslpreptest6_role LOGIN PASSWORD E'foo\\x07bar';\n" + "CREATE ROLE saslpreptest7_role LOGIN PASSWORD E'foo\\u0627\\u0031bar';" + ) + _reset_pg_hba(node, "scram-sha-256") + # Passwords are raw byte strings decoded as latin-1 so each \xNN is one byte + # on the wire (libpq reads PGPASSWORD as bytes), matching the Perl literals. + cases = [ + ("saslpreptest1_role", b"I\xc2\xadX", True), + ("saslpreptest1_role", b"\xe2\x85\xa8", True), + ("saslpreptest1_role", b"ix", False), + ("saslpreptest4a_role", b"a", True), + ("saslpreptest4a_role", b"\xc2\xaa", True), + ("saslpreptest4b_role", b"a", True), + ("saslpreptest4b_role", b"\xc2\xaa", True), + ("saslpreptest6_role", b"foo\x07bar", True), + ("saslpreptest6_role", b"foobar", False), + ("saslpreptest7_role", b"foo\xd8\xa71bar", True), + ("saslpreptest7_role", b"foo1\xd8\xa7bar", False), + ("saslpreptest7_role", b"foobar", False), + ] + for role, password, expected_ok in cases: + # Decode with surrogateescape so os.fsencode round-trips the exact bytes + # into the subprocess environment (latin-1 would be re-encoded as UTF-8). + _test_login( + node, role, password.decode("utf-8", "surrogateescape"), expected_ok + ) diff --git a/src/test/authentication/pyt/test_003_peer.py b/src/test/authentication/pyt/test_003_peer.py new file mode 100644 index 0000000000000..232b4bbef98a1 --- /dev/null +++ b/src/test/authentication/pyt/test_003_peer.py @@ -0,0 +1,361 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/authentication/t/003_peer.pl. + +Tests peer authentication and the user name map. Peer auth maps the connecting +OS user to a database role, optionally through pg_ident.conf maps that may use +exact names, the "all" keyword, regular expressions (with \\1 subexpression +replacement), and group membership ("+role"). The test is skipped if the +platform does not support peer authentication, and requires Unix-domain sockets. + +The OS/system user is discovered from the server itself via SYSTEM_USER (which +reads "peer:username" under peer auth), matching the Perl original rather than +guessing from the process environment. +""" + +import re +import sys + +import pytest + + +def _reset_pg_hba(node, hba_method): + """Delete pg_hba.conf, write a single 'local all all ' line, reload.""" + (node.datadir / "pg_hba.conf").unlink(missing_ok=True) + node.append_conf("local all all {}".format(hba_method), filename="pg_hba.conf") + node.reload() + + +def _reset_pg_ident(node, map_name, system_user, pg_user): + """Delete pg_ident.conf, write a single map line, reload.""" + (node.datadir / "pg_ident.conf").unlink(missing_ok=True) + node.append_conf( + "{} {} {}".format(map_name, system_user, pg_user), filename="pg_ident.conf" + ) + node.reload() + + +def _test_role(node, role, method, expected_res, test_details, *, log_like=None): + """Connect as role and assert success (expected_res 0) or failure (else). + + For a failure only the status code is checked (no error-message match), as in + the Perl test_role helper. log_like patterns are asserted against the server + log emitted during the attempt. + """ + connstr = "user={}".format(role) + status_string = "success" if expected_res == 0 else "failed" + testname = "authentication {} for method {}, role {} {}".format( + status_string, method, role, test_details + ) + if expected_res == 0: + node.connect_ok(connstr, testname, log_like=log_like) + else: + node.connect_fails(connstr, testname, log_like=log_like) + + +def _create_roles(node): + """Create the roles and group used by the user name map tests.""" + node.safe_psql("CREATE ROLE testmapuser LOGIN") + node.safe_psql("CREATE ROLE testmapgroup NOLOGIN") + node.safe_psql("GRANT testmapgroup TO testmapuser") + # Note the double backslash in the role name. + node.safe_psql(r'CREATE ROLE "testmapgroupliteral\1" LOGIN') + node.safe_psql(r'GRANT "testmapgroupliteral\1" TO testmapuser') + + +def _run_map_tests(node, system_user): + """Run the user-name-map matching scenarios for the given system user.""" + auth_ok = [ + re.compile( + r'connection authenticated: identity="{}" method=peer'.format( + re.escape(system_user) + ) + ) + ] + + # With a user name map. + _reset_pg_ident(node, "mypeermap", system_user, "testmapuser") + _reset_pg_hba(node, "peer map=mypeermap") + _test_role(node, "testmapuser", "peer", 0, "with user name map", log_like=auth_ok) + + # With the "all" keyword. + _reset_pg_ident(node, "mypeermap", system_user, "all") + _test_role( + node, + "testmapuser", + "peer", + 0, + 'with keyword "all" as database user in user name map', + log_like=auth_ok, + ) + + # With the "all" keyword, but quoted (no effect here). + _reset_pg_ident(node, "mypeermap", system_user, '"all"') + _test_role( + node, + "testmapuser", + "peer", + 2, + 'with quoted keyword "all" as database user in user name map', + log_like=[r'no match in usermap "mypeermap" for user "testmapuser"'], + ) + + # Regexp of the database user matches. + _reset_pg_ident(node, "mypeermap", system_user, r"/^testm.*$") + _test_role( + node, + "testmapuser", + "peer", + 0, + "with regexp of database user in user name map", + log_like=auth_ok, + ) + + # Regexp of the database user does not match. + _reset_pg_ident(node, "mypeermap", system_user, r"/^doesnotmatch.*$") + _test_role( + node, + "testmapuser", + "peer", + 2, + "with bad regexp of database user in user name map", + log_like=[r'no match in usermap "mypeermap" for user "testmapuser"'], + ) + + _run_system_user_regex_tests(node, system_user, auth_ok) + _run_backref_tests(node, system_user, auth_ok) + _run_group_tests(node, system_user, auth_ok) + + +def _run_system_user_regex_tests(node, system_user, auth_ok): + """Map tests that use a regular expression for the system user.""" + # Last 3 chars of the system user (or the whole name if <= 3 chars). + regex_test_string = system_user[-3:] + + # System user regular expression matches. + _reset_pg_ident( + node, "mypeermap", r"/^.*{}$".format(regex_test_string), "testmapuser" + ) + _test_role( + node, + "testmapuser", + "peer", + 0, + "with regexp of system user in user name map", + log_like=auth_ok, + ) + + # Both regular expressions match. + _reset_pg_ident( + node, "mypeermap", r"/^.*{}$".format(regex_test_string), r"/^testm.*$" + ) + _test_role( + node, + "testmapuser", + "peer", + 0, + "with regexps for both system and database user in user name map", + log_like=auth_ok, + ) + + # Regexp matches and database role is the "all" keyword. + _reset_pg_ident(node, "mypeermap", r"/^.*{}$".format(regex_test_string), "all") + _test_role( + node, + "testmapuser", + "peer", + 0, + 'with regexp of system user and keyword "all" in user name map', + log_like=auth_ok, + ) + + +def _run_backref_tests(node, system_user, auth_ok): + """Map tests exercising \\1 subexpression replacement and its errors.""" + regex_test_string = system_user[-3:] + mapped_name = "test{0}map{0}user".format(regex_test_string) + node.safe_psql("CREATE ROLE {} LOGIN".format(mapped_name)) + + # Regexp matches and \1 is replaced in the subexpression. + _reset_pg_ident( + node, "mypeermap", r"/^.*({})$".format(regex_test_string), r"test\1map\1user" + ) + _test_role( + node, + mapped_name, + "peer", + 0, + r"with regular expression in user name map with \1 replaced", + log_like=auth_ok, + ) + + # Regexp matches and \1 is replaced, even if quoted. + _reset_pg_ident( + node, + "mypeermap", + r"/^.*({})$".format(regex_test_string), + r'"test\1map\1user"', + ) + _test_role( + node, + mapped_name, + "peer", + 0, + r"with regular expression in user name map with quoted \1 replaced", + log_like=auth_ok, + ) + + # The regexp has no subexpression, but the database user contains \1. + _reset_pg_ident(node, "mypeermap", r"/^{}$".format(system_user), r"\1testmapuser") + _test_role( + node, + "testmapuser", + "peer", + 2, + r"with regular expression in user name map with \1 not replaced", + log_like=[ + r'regular expression "\^{}\$" has no subexpressions as requested ' + r'by backreference in "\\1testmapuser"'.format(re.escape(system_user)) + ], + ) + + # Regexp of the system user does not match (doubled system user). + bad_regex_test_string = system_user + system_user + _reset_pg_ident( + node, "mypeermap", r"/^.*{}$".format(bad_regex_test_string), "testmapuser" + ) + _test_role( + node, + "testmapuser", + "peer", + 2, + "with regexp of system user in user name map", + log_like=[r'no match in usermap "mypeermap" for user "testmapuser"'], + ) + + +def _run_group_tests(node, system_user, auth_ok): + """Map tests exercising group ("+role") membership matching.""" + regex_test_string = system_user[-3:] + + # Group role match for the database user. + _reset_pg_ident(node, "mypeermap", system_user, "+testmapgroup") + _test_role( + node, "testmapuser", "peer", 0, "plain user with group", log_like=auth_ok + ) + _test_role( + node, + "testmapgroup", + "peer", + 2, + "group user with group", + log_like=[r'role "testmapgroup" is not permitted to log in'], + ) + + # Quotes on the group match nullify its effect. + _reset_pg_ident(node, "mypeermap", system_user, '"+testmapgroup"') + _test_role( + node, + "testmapuser", + "peer", + 2, + "plain user with quoted group name", + log_like=[r'no match in usermap "mypeermap" for user "testmapuser"'], + ) + + # Regexp for the system user, with a group membership check. + _reset_pg_ident( + node, "mypeermap", r"/^.*{}$".format(regex_test_string), "+testmapgroup" + ) + _test_role( + node, + "testmapuser", + "peer", + 0, + "regexp of system user as group member", + log_like=auth_ok, + ) + _test_role( + node, + "testmapgroup", + "peer", + 2, + "regexp of system user as non-member of group", + log_like=[r'role "testmapgroup" is not permitted to log in'], + ) + + # Membership checks and regexes use literal \1 instead of replacing it. + _reset_pg_ident( + node, + "mypeermap", + r"/^.*{}(.*)$".format(regex_test_string), + r"+testmapgroupliteral\1", + ) + _test_role( + node, + "testmapuser", + "peer", + 0, + r"membership check with literal \1", + log_like=auth_ok, + ) + + # Same with a quoted regular expression for the database user; no \1 repl. + _reset_pg_ident( + node, + "mypeermap", + r"/^.*{}(.*)$".format(regex_test_string), + r'"/^testmapgroupliteral\\1$"', + ) + _test_role( + node, + r"testmapgroupliteral\\1", + "peer", + 0, + r"regexp of database user with literal \1", + log_like=auth_ok, + ) + + +@pytest.mark.skipif(sys.platform == "win32", reason="needs Unix-domain sockets") +def test_003_peer(create_pg): + """Peer authentication and user name map matching scenarios.""" + node = create_pg("node", start=False) + node.append_conf("log_connections = authentication\n") + # Needed to allow connect_fails to inspect the postmaster log. + node.append_conf("log_min_messages = debug2") + node.start() + + # Set pg_hba.conf with the peer authentication. + _reset_pg_hba(node, "peer") + + # Check if peer authentication is supported on this platform. + log_offset = node.current_log_position() + node.psql_capture("", on_error_stop=False) + if node.log_matches( + r"peer authentication is not supported on this platform", log_offset + ): + pytest.skip("peer authentication is not supported on this platform") + + _create_roles(node) + + # Extract the system user for the user name map. + system_user = node.safe_psql("select (string_to_array(SYSTEM_USER, ':'))[2]") + + # While on it, check the status of huge pages: either on or off, never + # unknown. + huge_pages_status = node.safe_psql("SHOW huge_pages_status;") + assert huge_pages_status != "unknown", "check huge_pages_status" + + # Without the user name map: failure as the database role does not map to an + # authorized system user. + _test_role( + node, + "testmapuser", + "peer", + 2, + "without user name map", + log_like=[r'Peer authentication failed for user "testmapuser"'], + ) + + _run_map_tests(node, system_user) diff --git a/src/test/authentication/pyt/test_004_file_inclusion.py b/src/test/authentication/pyt/test_004_file_inclusion.py new file mode 100644 index 0000000000000..b5325b54a48ca --- /dev/null +++ b/src/test/authentication/pyt/test_004_file_inclusion.py @@ -0,0 +1,295 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/authentication/t/004_file_inclusion.pl. + +Tests include/include_if_exists/include_dir directives in HBA and ident files. +The HBA and ident entry points are relocated (via ALTER SYSTEM) into +subdirectories, then a structure of files referencing each other through +include directives is generated. After a restart the test compares the contents +of pg_hba_file_rules and pg_ident_file_mappings against the rule/map text built +up alongside the files, verifying line numbers, rule numbers, relative-path +resolution, include_if_exists for missing/present files, include_dir ordering, +and the @file database-name expansion. Requires Unix-domain sockets. +""" + +import os +import sys + +import pytest + + +class _Counters: + """Tracks per-file line numbers and the global hba/ident rule counters. + + Mirrors the %line_counters hash of the Perl original: line_counters[file] + is the next line number written to that file, while hba_rule/ident_rule are + the global pg_hba_file_rules.rule_number / pg_ident_file_mappings.map_number + counters that advance only for non-include entries. + """ + + def __init__(self): + self.hba_rule = 0 + self.ident_rule = 0 + self.files = {} + + def next_fileline(self, filename): + self.files[filename] = self.files.get(filename, 0) + 1 + return self.files[filename] + + def next_hba_rule(self): + self.hba_rule += 1 + return self.hba_rule + + def next_ident_rule(self): + self.ident_rule += 1 + return self.ident_rule + + +def _append_conf(node, filename, entry): + """Append entry (plus newline) to filename, relative to the data dir. + + Creates parent directories as needed, mirroring the way the Perl test relies + on directories it has already mkdir'd. + """ + path = node.datadir / filename + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "a", encoding="utf-8") as fh: + fh.write(entry + "\n") + + +def _basename(filename): + return os.path.basename(filename) + + +def _add_hba_line(node, counters, filename, entry): + """Append entry to an HBA file and return its expected pg_hba_file_rules row. + + Maintains the file-line and global rule counters. An "include" directive + generates no catalog row (and does not advance the rule counter), so it + returns the empty string. + """ + _append_conf(node, filename, entry) + base_filename = _basename(filename) + fileline = counters.next_fileline(filename) + + if entry.startswith("include"): + return "" + + globline = counters.next_hba_rule() + tokens = entry.split(" ") + tokens[1] = "{" + tokens[1] + "}" # database + tokens[2] = "{" + tokens[2] + "}" # user_name + tokens.append("") # options + tokens.append("") # error + + line = "" + if globline > 1: + line += "\n" + line += "{}|{}|{}|".format(globline, base_filename, fileline) + line += "|".join(tokens) + return line + + +def _add_ident_line(node, counters, filename, entry): + """Append entry to an ident file and return its pg_ident_file_mappings row. + + Like _add_hba_line, but for the ident map catalog; include directives + generate no row. + """ + base_filename = _basename(filename) + _append_conf(node, filename, entry) + fileline = counters.next_fileline(filename) + + if entry.startswith("include"): + return "" + + globline = counters.next_ident_rule() + tokens = entry.split(" ") + tokens.append("") # error + + line = "" + if globline > 1: + line += "\n" + line += "{}|{}|{}|".format(globline, base_filename, fileline) + line += "|".join(tokens) + return line + + +def _build_hba_structure(node, counters, hba_file): + """Generate the HBA file structure with include directives. + + Returns the expected concatenated pg_hba_file_rules contents. + """ + expected = "" + + for sub in ("subdir1", "hba_inc", "hba_inc_if", "hba_pos"): + (node.datadir / sub).mkdir(parents=True, exist_ok=True) + + # First, make sure that we will always be able to connect. + expected += _add_hba_line(node, counters, hba_file, "local all all trust") + + # "include". As hba_file lives in subdir1, pg_hba_pre.conf is at the root of + # the data directory. + expected += _add_hba_line(node, counters, hba_file, "include ../pg_hba_pre.conf") + expected += _add_hba_line(node, counters, "pg_hba_pre.conf", "local pre all reject") + expected += _add_hba_line(node, counters, hba_file, "local all all reject") + _add_hba_line(node, counters, hba_file, "include ../hba_pos/pg_hba_pos.conf") + expected += _add_hba_line( + node, counters, "hba_pos/pg_hba_pos.conf", "local pos all reject" + ) + # A relative include path is resolved from the base location of the file it + # is loaded from. + expected += _add_hba_line( + node, counters, "hba_pos/pg_hba_pos.conf", "include pg_hba_pos2.conf" + ) + expected += _add_hba_line( + node, counters, "hba_pos/pg_hba_pos2.conf", "local pos2 all reject" + ) + expected += _add_hba_line( + node, counters, "hba_pos/pg_hba_pos2.conf", "local pos3 all reject" + ) + + # include_if_exists: missing file, no catalog entries. + expected += _add_hba_line( + node, counters, hba_file, "include_if_exists ../hba_inc_if/none" + ) + # File with some contents loaded. + expected += _add_hba_line( + node, counters, hba_file, "include_if_exists ../hba_inc_if/some" + ) + expected += _add_hba_line( + node, counters, "hba_inc_if/some", "local if_some all reject" + ) + + # include_dir + expected += _add_hba_line(node, counters, hba_file, "include_dir ../hba_inc") + expected += _add_hba_line( + node, counters, "hba_inc/01_z.conf", "local dir_z all reject" + ) + expected += _add_hba_line( + node, counters, "hba_inc/02_a.conf", "local dir_a all reject" + ) + # Garbage file not suffixed by .conf, so it is ignored. + _append_conf(node, "hba_inc/garbageconf", "should not be included") + + # Authentication file expanded in an existing entry for database names. + # As it is expanded, ignore the output generated. + _add_hba_line(node, counters, hba_file, "local @../dbnames.conf all reject") + _append_conf(node, "dbnames.conf", "db1") + _append_conf(node, "dbnames.conf", "db3") + expected += ( + "\n" + + str(counters.hba_rule) + + "|" + + _basename(hba_file) + + "|" + + str(counters.files[hba_file]) + + "|local|{db1,db3}|{all}|reject||" + ) + return expected + + +def _build_ident_structure(node, counters, ident_file): + """Generate the ident file structure with include directives. + + Returns the expected concatenated pg_ident_file_mappings contents. + """ + expected = "" + + for sub in ("subdir2", "ident_inc", "ident_inc_if", "ident_pos"): + (node.datadir / sub).mkdir(parents=True, exist_ok=True) + + # include. pg_ident_pre.conf is at the root of the data directory. + expected += _add_ident_line( + node, counters, ident_file, "include ../pg_ident_pre.conf" + ) + expected += _add_ident_line(node, counters, "pg_ident_pre.conf", "pre foo bar") + expected += _add_ident_line(node, counters, ident_file, "test a b") + expected += _add_ident_line( + node, counters, ident_file, "include ../ident_pos/pg_ident_pos.conf" + ) + expected += _add_ident_line( + node, counters, "ident_pos/pg_ident_pos.conf", "pos foo bar" + ) + # A relative include path is resolved from the base location of the file it + # is loaded from. + expected += _add_ident_line( + node, counters, "ident_pos/pg_ident_pos.conf", "include pg_ident_pos2.conf" + ) + expected += _add_ident_line( + node, counters, "ident_pos/pg_ident_pos2.conf", "pos2 foo bar" + ) + expected += _add_ident_line( + node, counters, "ident_pos/pg_ident_pos2.conf", "pos3 foo bar" + ) + + # include_if_exists: missing file, no catalog entries. + expected += _add_ident_line( + node, counters, ident_file, "include_if_exists ../ident_inc_if/none" + ) + # File with some contents loaded. + expected += _add_ident_line( + node, counters, ident_file, "include_if_exists ../ident_inc_if/some" + ) + expected += _add_ident_line(node, counters, "ident_inc_if/some", "if_some foo bar") + + # include_dir + expected += _add_ident_line(node, counters, ident_file, "include_dir ../ident_inc") + expected += _add_ident_line(node, counters, "ident_inc/01_z.conf", "dir_z foo bar") + expected += _add_ident_line(node, counters, "ident_inc/02_a.conf", "dir_a foo bar") + # Garbage file not suffixed by .conf, so it is ignored. + _append_conf(node, "ident_inc/garbageconf", "should not be included") + + return expected + + +_HBA_QUERY = """SELECT rule_number, + regexp_replace(file_name, '.*/', ''), + line_number, + type, + database, + user_name, + auth_method, + options, + error + FROM pg_hba_file_rules ORDER BY rule_number;""" + +_IDENT_QUERY = """SELECT map_number, + regexp_replace(file_name, '.*/', ''), + line_number, + map_name, + sys_name, + pg_username, + error + FROM pg_ident_file_mappings ORDER BY map_number""" + + +@pytest.mark.skipif(sys.platform == "win32", reason="needs Unix-domain sockets") +def test_004_file_inclusion(create_pg): + """HBA/ident include directives reflect correctly in the catalog views.""" + # Locations for the entry points of the HBA and ident files. + hba_file = "subdir1/pg_hba_custom.conf" + ident_file = "subdir2/pg_ident_custom.conf" + + node = create_pg("primary") + data_dir = node.datadir + counters = _Counters() + + # Customise main auth file names. + node.safe_psql("ALTER SYSTEM SET hba_file = '{}/{}'".format(data_dir, hba_file)) + node.safe_psql("ALTER SYSTEM SET ident_file = '{}/{}'".format(data_dir, ident_file)) + + # Remove the original ones; this node links to non-default ones now. + (data_dir / "pg_hba.conf").unlink(missing_ok=True) + (data_dir / "pg_ident.conf").unlink(missing_ok=True) + + hba_expected = _build_hba_structure(node, counters, hba_file) + ident_expected = _build_ident_structure(node, counters, ident_file) + + node.restart() + + contents = node.safe_psql(_HBA_QUERY) + assert contents == hba_expected, "check contents of pg_hba_file_rules" + + contents = node.safe_psql(_IDENT_QUERY) + assert contents == ident_expected, "check contents of pg_ident_file_mappings" diff --git a/src/test/authentication/pyt/test_005_sspi.py b/src/test/authentication/pyt/test_005_sspi.py new file mode 100644 index 0000000000000..d1f7f2f349130 --- /dev/null +++ b/src/test/authentication/pyt/test_005_sspi.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/authentication/t/005_sspi.pl. + +Windows SSPI authentication: require_auth=sspi connects, while require_auth=!sspi +and require_auth=scram-sha-256 are rejected because the server requests SSPI. +Requires Windows without PG_TEST_USE_UNIX_SOCKETS; always skips elsewhere. +""" + +import os +import platform + +import pytest + + +def test_005_sspi(create_pg): + """SSPI auth: required works; forbidden and SCRAM-required fail.""" + windows_os = platform.system() == "Windows" + use_unix_sockets = bool(os.environ.get("PG_TEST_USE_UNIX_SOCKETS")) + if not windows_os or use_unix_sockets: + pytest.skip("SSPI tests require Windows (without PG_TEST_USE_UNIX_SOCKETS)") + node = create_pg("primary", start=False) + node.append_conf("log_connections = authentication\n") + node.start() + huge_pages_status = node.safe_psql("SHOW huge_pages_status;") + assert huge_pages_status != "unknown", "check huge_pages_status" + node.connect_ok( + "require_auth=sspi", + "SSPI authentication required, works with SSPI auth", + ) + node.connect_fails( + "require_auth=!sspi", + "SSPI authentication forbidden, fails with SSPI auth", + expected_stderr=r'authentication method requirement "!sspi" failed: server requested SSPI authentication', + ) + node.connect_fails( + "require_auth=scram-sha-256", + "SCRAM authentication required, fails with SSPI auth", + expected_stderr=r'authentication method requirement "scram-sha-256" failed: server requested SSPI authentication', + ) diff --git a/src/test/authentication/pyt/test_006_login_trigger.py b/src/test/authentication/pyt/test_006_login_trigger.py new file mode 100644 index 0000000000000..2c25e91f0af32 --- /dev/null +++ b/src/test/authentication/pyt/test_006_login_trigger.py @@ -0,0 +1,150 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/authentication/t/006_login_trigger.pl. + +A login event trigger fires on every connection: it records the SESSION_USER in +a table and raises a NOTICE ("You are welcome") for allowed users, or an +EXCEPTION for a disallowed one. The test exercises creating/enabling the trigger, +connecting as another role, verifying the recorded logins, and cleanup, checking +each command's exit code, stdout, and stderr. +""" + +import re +import sys + +import pytest + + +def _psql_command( + node, + sql, + expected_ret, + test_name, + *, + connstr=None, + log_like=None, + log_unlike=None, + log_exact=None, + err_like=None, + err_unlike=None, + err_exact=None, +): + res = node.psql_capture(sql, connstr=connstr, on_error_stop=False) + assert res.rc == expected_ret, "{}: exit code {}".format(test_name, expected_ret) + out, err = res.stdout, res.stderr + for rx in log_like or []: + assert re.search(rx, out), "{}: log matches".format(test_name) + for rx in log_unlike or []: + assert not re.search(rx, out), "{}: log unmatches".format(test_name) + if log_exact is not None: + assert out == log_exact, "{}: log equals".format(test_name) + for rx in err_like or []: + assert re.search(rx, err), "{}: err matches".format(test_name) + for rx in err_unlike or []: + assert not re.search(rx, err), "{}: err unmatches".format(test_name) + if err_exact is not None: + assert err == err_exact, "{}: err equals".format(test_name) + + +@pytest.mark.skipif(sys.platform == "win32", reason="needs Unix-domain sockets") +def test_006_login_trigger(create_pg): + """A login event trigger records logins and gates them by SESSION_USER.""" + node = create_pg("main", extra=["--locale=C", "--encoding=UTF8"], start=False) + node.append_conf( + "\nwal_level = 'logical'\nmax_replication_slots = 4\nmax_wal_senders = 4\n" + ) + node.start() + _psql_command( + node, + "CREATE ROLE regress_alice WITH LOGIN;\n" + "CREATE ROLE regress_mallory WITH LOGIN;\n" + "CREATE TABLE user_logins(id serial, who text);\n" + "GRANT SELECT ON user_logins TO public;\n", + 0, + "create tmp objects", + log_exact="", + err_exact="", + ) + _psql_command( + node, + "CREATE FUNCTION on_login_proc() RETURNS event_trigger AS $$\nBEGIN\n" + " INSERT INTO user_logins (who) VALUES (SESSION_USER);\n" + " IF SESSION_USER = 'regress_mallory' THEN\n" + " RAISE EXCEPTION 'Hello %! You are NOT welcome here!', SESSION_USER;\n" + " END IF;\n" + " RAISE NOTICE 'Hello %! You are welcome!', SESSION_USER;\nEND;\n" + "$$ LANGUAGE plpgsql SECURITY DEFINER;\n", + 0, + "create trigger function", + log_exact="", + err_exact="", + ) + _psql_command( + node, + "CREATE EVENT TRIGGER on_login_trigger ON login EXECUTE PROCEDURE " + "on_login_proc();", + 0, + "create event trigger", + log_exact="", + err_exact="", + ) + _psql_command( + node, + "ALTER EVENT TRIGGER on_login_trigger ENABLE ALWAYS;", + 0, + "alter event trigger", + log_exact="", + err_like=[r"You are welcome"], + ) + _psql_command( + node, + "SELECT COUNT(*) FROM user_logins;", + 0, + "select count", + log_exact="2", + err_like=[r"You are welcome"], + ) + _psql_command( + node, + "SELECT 1;", + 0, + "try regress_alice", + connstr="user=regress_alice", + log_exact="1", + err_like=[r"You are welcome"], + err_unlike=[r"You are NOT welcome"], + ) + _psql_command( + node, + "SELECT * FROM user_logins;", + 0, + "select *", + log_like=[r"3\|regress_alice"], + log_unlike=[r"regress_mallory"], + err_like=[r"You are welcome"], + ) + _psql_command( + node, + "SELECT COUNT(*) FROM user_logins;", + 0, + "select count", + log_exact="5", + err_like=[r"You are welcome"], + ) + _psql_command( + node, + "DROP EVENT TRIGGER on_login_trigger;", + 0, + "drop event trigger", + log_exact="", + err_like=[r"You are welcome"], + ) + _psql_command( + node, + "DROP TABLE user_logins;\nDROP FUNCTION on_login_proc;\n" + "DROP ROLE regress_mallory;\nDROP ROLE regress_alice;\n", + 0, + "cleanup", + log_exact="", + err_exact="", + ) diff --git a/src/test/authentication/pyt/test_007_pre_auth.py b/src/test/authentication/pyt/test_007_pre_auth.py new file mode 100644 index 0000000000000..35ec3a5ae84fc --- /dev/null +++ b/src/test/authentication/pyt/test_007_pre_auth.py @@ -0,0 +1,51 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/authentication/t/007_pre_auth.pl. + +Connections that are still authenticating are visible in pg_stat_activity with +state 'starting' and wait_event 'init-pre-auth'; once authentication completes +they reach 'idle'. Uses an injection point to hold a backend in pre-auth. +Requires an injection-points build with the injection_points extension. +""" + +import os +import time + +import pytest + + +def test_007_pre_auth(create_pg): + """Authenticating backends appear in pg_stat_activity, then reach idle.""" + if os.environ.get("enable_injection_points") != "yes": + pytest.skip("Injection points not supported by this build") + node = create_pg("primary", start=False) + node.append_conf("\nlog_connections = 'receipt,authentication'\n") + node.start() + if not node.check_extension("injection_points"): + pytest.skip("Extension injection_points not installed") + node.safe_psql("CREATE EXTENSION injection_points") + psql = node.background_psql("postgres") + psql.query_safe("SELECT injection_points_attach('init-pre-auth', 'wait')") + conn = node.background_psql("postgres", wait=False) + pid = "" + while pid == "": + pid = psql.query( + "SELECT pid FROM pg_stat_activity\n" + " WHERE backend_type = 'client backend'\n" + " AND state = 'starting'\n" + " AND wait_event = 'init-pre-auth';" + ) + if pid == "": + time.sleep(0.1) + psql.query_safe("SELECT injection_points_wakeup('init-pre-auth');") + conn.wait_connect() + state = "" + while state != "idle": + state = psql.query( + "SELECT state FROM pg_stat_activity WHERE pid = {};".format(pid) + ) + if state != "idle": + time.sleep(0.1) + psql.query_safe("SELECT injection_points_detach('init-pre-auth');") + psql.quit() + conn.quit() diff --git a/src/test/kerberos/meson.build b/src/test/kerberos/meson.build index 11aa732e69bf9..1e27e1e2a0904 100644 --- a/src/test/kerberos/meson.build +++ b/src/test/kerberos/meson.build @@ -14,4 +14,15 @@ tests += { 'with_krb_srvnam': 'postgres', }, }, + 'pytest': { + 'test_kwargs': {'priority': 40}, # kerberos tests are slow, start early + 'tests': [ + 'pyt/test_001_auth.py', + ], + 'env': { + 'with_gssapi': gssapi.found() ? 'yes' : 'no', + 'with_krb_srvnam': 'postgres', + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + }, } diff --git a/src/test/kerberos/pyt/test_001_auth.py b/src/test/kerberos/pyt/test_001_auth.py new file mode 100644 index 0000000000000..ac1f34797f1de --- /dev/null +++ b/src/test/kerberos/pyt/test_001_auth.py @@ -0,0 +1,716 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/kerberos/t/001_auth.pl. + +Sets up a KDC and then runs a variety of tests to make sure that the +GSSAPI/Kerberos authentication and encryption are working properly, that the +options in pg_hba.conf and pg_ident.conf are handled correctly, that the +server-side pg_stat_gssapi view reports what we expect to see for each test and +that SYSTEM_USER returns what we expect to see. + +Also tests that GSSAPI delegation is working properly and that those +credentials can be used to make dblink / postgres_fdw connections. + +Since this requires setting up a full KDC, it doesn't make much sense to have +multiple test scripts (since they'd have to also create their own KDC and that +could cause race conditions or other problems) -- so just add whatever other +tests are needed to here. +""" + +import os +import pathlib +import re +import stat + +import pytest + +import pypg +from pypg import KerberosServer + +# This suite stands up a full KDC and opens local TCP ports, so it is gated +# behind PG_TEST_EXTRA=kerberos, exactly like the Perl 001_auth.pl plan check. +pytestmark = pypg.require_test_extras("kerberos") + +# psql inherits PGAPPNAME, which the server logs as application_name. The Perl +# harness sets PGAPPNAME to basename($0); mirror that with this file's name so +# the "connection authorized: ... application_name=..." log assertions match. +APPLICATION = "test_001_auth.py" + +DBNAME = "postgres" +USERNAME = "test1" +HOST = "auth-test-localhost.postgresql.example.com" +HOSTADDR = "127.0.0.1" +REALM = "EXAMPLE.COM" +TEST1_PASSWORD = "secret1" + + +def _skip_if_no_gssapi(): + """Skip unless the build links GSSAPI (mirrors the with_gssapi plan check).""" + if os.environ.get("with_gssapi") != "yes": + pytest.skip("GSSAPI/Kerberos not supported by this build") + + +@pytest.fixture(scope="module") +def pgpass(tmp_check): + """A .pgpass file that must never be used (mirrors the Perl $pgpass setup). + + It is deliberately filled with a wrong password so that any code path which + accidentally falls back to it is caught. + """ + path = tmp_check / ".pgpass" + pypg.append_to_file(path, "*:*:*:*:abc123") + os.chmod(path, stat.S_IRUSR | stat.S_IWUSR) + return path + + +@pytest.fixture(scope="module") +def krb(tmp_check, datadir): + """Stand up the KDC, add the test1 principal, and tear it down at the end. + + Mirrors `PostgreSQL::Test::Kerberos->new` plus the test1 principal creation + done at the top of 001_auth.pl. The log directory mirrors the Perl + log_path; here it lives under the data directory's parent. + """ + _skip_if_no_gssapi() + log_path = pathlib.Path(datadir).parent / "log" + log_path.mkdir(parents=True, exist_ok=True) + server = KerberosServer.setup(tmp_check, log_path, HOST, HOSTADDR, REALM) + server.create_principal("test1", TEST1_PASSWORD) + try: + yield server + finally: + server.stop() + + +@pytest.fixture(scope="module") +def node(krb, create_pg_module): + """Initialize and start the PostgreSQL server configured for GSSAPI. + + Mirrors the "setting up PostgreSQL instance" block: listen on the test + interface, point krb_server_keyfile at the KDC-issued service keytab, and + enable verbose connection logging so the log assertions can fire. + """ + pg = create_pg_module("node", start=False) + # psql inherits PGAPPNAME as its application_name; the Perl harness sets it + # to basename($0). Set it here so the connection-authorized log lines name + # this test file, matching the application_name assertions. + os.environ["PGAPPNAME"] = APPLICATION + pg.append_conf( + "\n".join( + [ + "listen_addresses = '{}'".format(HOSTADDR), + "krb_server_keyfile = '{}'".format(krb.keytab), + "log_connections = all", + "log_min_messages = debug2", + "lc_messages = 'C'", + ] + ) + ) + pg.start() + return pg + + +@pytest.fixture(scope="module") +def setup_sql(node, pgpass): + """Create the users, extensions, foreign servers and tables under test. + + Mirrors the long sequence of safe_psql() statements after server start. + Skips the whole module if postgres_fdw / dblink are not available, since + the delegation tests depend on them (the Perl script assumes contrib). + """ + port = node.port + for ext in ("postgres_fdw", "dblink"): + if not node.check_extension(ext): + pytest.skip("{} contrib extension is required".format(ext)) + + node.safe_psql("CREATE USER test1;") + node.safe_psql("CREATE USER test2 WITH ENCRYPTED PASSWORD 'abc123';") + node.safe_psql("CREATE EXTENSION postgres_fdw;") + node.safe_psql("CREATE EXTENSION dblink;") + node.safe_psql( + "CREATE SERVER s1 FOREIGN DATA WRAPPER postgres_fdw OPTIONS " + "(host '{host}', hostaddr '{addr}', port '{port}', dbname 'postgres');".format( + host=HOST, addr=HOSTADDR, port=port + ) + ) + node.safe_psql( + "CREATE SERVER s2 FOREIGN DATA WRAPPER postgres_fdw OPTIONS " + "(port '{port}', dbname 'postgres', passfile '{pgpass}');".format( + port=port, pgpass=pgpass + ) + ) + node.safe_psql("GRANT USAGE ON FOREIGN SERVER s1 TO test1;") + node.safe_psql("CREATE USER MAPPING FOR test1 SERVER s1 OPTIONS (user 'test1');") + node.safe_psql("CREATE USER MAPPING FOR test1 SERVER s2 OPTIONS (user 'test2');") + node.safe_psql("CREATE TABLE t1 (c1 int);") + node.safe_psql("INSERT INTO t1 VALUES (1);") + node.safe_psql( + "CREATE FOREIGN TABLE tf1 (c1 int) SERVER s1 OPTIONS " + "(schema_name 'public', table_name 't1');" + ) + node.safe_psql("GRANT SELECT ON t1 TO test1;") + node.safe_psql("GRANT SELECT ON tf1 TO test1;") + node.safe_psql( + "CREATE FOREIGN TABLE tf2 (c1 int) SERVER s2 OPTIONS " + "(schema_name 'public', table_name 't1');" + ) + node.safe_psql("GRANT SELECT ON tf2 TO test1;") + node.safe_psql( + "CREATE TABLE ids (id) AS SELECT 'gss:test1@{realm}' " + "FROM generate_series(1, 10);".format(realm=REALM) + ) + node.safe_psql("GRANT SELECT ON ids TO public;") + + +def _connstr(node, role, gssencmode): + """Build the TCP+GSS connection string used by every test_access/query. + + Mirrors `$node->connstr('postgres') . " user=$role host=$host + hostaddr=$hostaddr $gssencmode"`. Later keywords override earlier ones in + libpq, so the appended host/hostaddr replace the socket host in connstr(). + """ + base = node.connstr("postgres") + return "{base} user={role} host={host} hostaddr={addr} {mode}".format( + base=base, role=role, host=HOST, addr=HOSTADDR, mode=gssencmode + ).rstrip() + + +def _test_access(node, role, query, expected_res, gssencmode, test_name, log_msgs=None): + """Connect over TCP/IP for Kerberos; assert success/failure + log lines. + + Mirrors the Perl test_access(): on expected_res==0 the connection must + succeed and the query must return SQL true ("t"); otherwise it must fail. + log_msgs are matched literally against the server log emitted during the + attempt. + """ + connstr = _connstr(node, role, gssencmode) + log_like = [re.escape(m) for m in log_msgs] if log_msgs else None + if expected_res == 0: + node.connect_ok( + connstr, + test_name, + sql=query, + expected_stdout=r"^t$", + log_like=log_like, + ) + else: + node.connect_fails(connstr, test_name, log_like=log_like) + + +def _test_query(node, role, query, expected, gssencmode, test_name): + """Connect over TCP/IP and assert an arbitrary query result. + + Mirrors the Perl test_query(): the connection must succeed and stdout must + match the expected regex. + """ + connstr = _connstr(node, role, gssencmode) + node.connect_ok(connstr, test_name, sql=query, expected_stdout=expected) + + +def _reset_hba(node, lines): + """Replace pg_hba.conf with the given lines and restart (mirrors the Perl). + + The Perl script unlinks pg_hba.conf and writes fresh contents before each + HBA scenario, then restarts. We rewrite the file in place to the same + effect. + """ + hba = pathlib.Path(node.datadir) / "pg_hba.conf" + with open(hba, "w", encoding="utf-8") as fh: + fh.write("\n".join(lines) + "\n") + node.restart() + + +STAT_NOT_DELEGATED = ( + "SELECT gss_authenticated AND encrypted AND NOT credentials_delegated " + "FROM pg_stat_gssapi WHERE pid = pg_backend_pid();" +) +STAT_DELEGATED = ( + "SELECT gss_authenticated AND encrypted AND credentials_delegated " + "FROM pg_stat_gssapi WHERE pid = pg_backend_pid();" +) +STAT_NOENC_DELEGATED = ( + "SELECT gss_authenticated AND NOT encrypted AND credentials_delegated " + "FROM pg_stat_gssapi WHERE pid = pg_backend_pid();" +) + + +def test_001_auth(node, krb, setup_sql, pgpass): + """Full GSSAPI/Kerberos auth, encryption, delegation and mapping scenarios.""" + port = node.port + + # --- host hba, no ident map yet ------------------------------------- + _reset_hba( + node, + [ + "local all test2 scram-sha-256", + "host all all {}/32 gss map=mymap".format(HOSTADDR), + ], + ) + + _test_access(node, "test1", "SELECT true", 2, "", "fails without ticket") + + krb.create_ticket("test1", TEST1_PASSWORD) + + _test_access( + node, + "test1", + "SELECT true", + 2, + "", + "fails without mapping", + log_msgs=[ + 'connection authenticated: identity="test1@{}" method=gss'.format(REALM), + 'no match in usermap "mymap" for user "test1"', + ], + ) + + node.append_conf("mymap /^(.*)@{}$ \\1".format(REALM), filename="pg_ident.conf") + node.restart() + + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "", + "succeeds with mapping with default gssencmode and host hba, ticket not forwardable", + log_msgs=_auth_msgs("yes", "no"), + ) + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=prefer", + "succeeds with GSS-encrypted access preferred with host hba, ticket not forwardable", + log_msgs=_auth_msgs("yes", "no"), + ) + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=require", + "succeeds with GSS-encrypted access required with host hba, ticket not forwardable", + log_msgs=_auth_msgs("yes", "no"), + ) + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=prefer gssdelegation=1", + "succeeds with GSS-encrypted access preferred with host hba and credentials not delegated even though asked for (ticket not forwardable)", + log_msgs=_auth_msgs("yes", "no"), + ) + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=require gssdelegation=1", + "succeeds with GSS-encrypted access required with host hba and credentials not delegated even though asked for (ticket not forwardable)", + log_msgs=_auth_msgs("yes", "no"), + ) + + # Test that we can transport a reasonable amount of data. + _test_query( + node, + "test1", + "SELECT * FROM generate_series(1, 100000);", + r"^1\n(?:.*\n)*1024\n(?:.*\n)*9999\n(?:.*\n)*100000$", + "gssencmode=require", + "receiving 100K lines works", + ) + _test_query( + node, + "test1", + "CREATE TEMP TABLE mytab (f1 int primary key);\n" + "COPY mytab FROM STDIN;\n" + + "\n".join(str(i) for i in range(1, 100001)) + + "\n\\.\n" + + "SELECT COUNT(*) FROM mytab;", + r"^100000$", + "gssencmode=require", + "sending 100K lines works", + ) + + _require_auth_host(node) + + # Test that SYSTEM_USER works. + _test_query( + node, + "test1", + "SELECT SYSTEM_USER;", + r"^gss:test1@{}$".format(re.escape(REALM)), + "gssencmode=require", + "testing system_user", + ) + # Test that SYSTEM_USER works with parallel workers. + _test_query( + node, + "test1", + "\n".join( + [ + "SET min_parallel_table_scan_size TO 0;", + "SET parallel_setup_cost TO 0;", + "SET parallel_tuple_cost TO 0;", + "SET max_parallel_workers_per_gather TO 2;", + "SELECT bool_and(SYSTEM_USER = id) FROM ids;", + ] + ), + r"^t$", + "gssencmode=require", + "testing system_user with parallel workers", + ) + + _hostgssenc_scenarios(node, krb, pgpass, port) + _hostnogssenc_scenarios(node, pgpass, port) + _include_realm_scenarios(node, port) + + +def _auth_msgs(encrypted, delegated): + """Return the unescaped expected-log fragments for a GSS connection.""" + authorized = ( + "connection authorized: user={user} database={db} " + "application_name={app} GSS (authenticated=yes, encrypted={enc}, " + "delegated_credentials={deleg}, principal=test1@{realm})".format( + user=USERNAME, + db=DBNAME, + app=APPLICATION, + enc=encrypted, + deleg=delegated, + realm=REALM, + ) + ) + return [ + 'connection authenticated: identity="test1@{}" method=gss'.format(REALM), + authorized, + ] + + +def _require_auth_host(node): + """require_auth=gss/sspi checks against the host hba (mirrors the Perl).""" + node.connect_ok( + _connstr(node, "test1", "gssencmode=disable require_auth=gss"), + "GSS authentication requested, works with non-encrypted GSS", + ) + node.connect_ok( + _connstr(node, "test1", "gssencmode=require require_auth=gss"), + "GSS authentication requested, works with encrypted GSS auth", + ) + node.connect_fails( + _connstr(node, "test1", "gssencmode=disable require_auth=sspi"), + "SSPI authentication requested, fails with non-encrypted GSS", + expected_stderr=r'authentication method requirement "sspi" failed: server requested GSSAPI authentication', + ) + node.connect_fails( + _connstr(node, "test1", "gssencmode=require require_auth=sspi"), + "SSPI authentication requested, fails with encrypted GSS", + expected_stderr=r'authentication method requirement "sspi" failed: server did not complete authentication', + ) + + +def _delegation_fails(node, sql, connstr, msg): + """Assert a dblink/postgres_fdw psql call fails (rc=3) without delegation. + + Mirrors the repeated $node->psql(...) blocks that expect exit code 3 and a + "password or GSSAPI delegated credentials required" stderr with empty + stdout. + """ + result = node.psql_capture(sql, connstr=connstr) + assert result.rc == 3, "{}: expected exit 3, got {}\n{}".format( + msg, result.rc, result.stderr + ) + assert re.search( + r"password or GSSAPI delegated credentials required", result.stderr + ), "{}: stderr was {!r}".format(msg, result.stderr) + assert re.search(r"^$", result.stdout), "{}: stdout was {!r}".format( + msg, result.stdout + ) + + +def _hostgssenc_scenarios(node, krb, pgpass, port): + """hostgssenc hba: forwardable ticket, gss_accept_delegation off/on.""" + _reset_hba( + node, + [ + " local all test2 scram-sha-256", + "\thostgssenc all all {}/32 gss map=mymap".format(HOSTADDR), + ], + ) + + # Re-create the ticket, with the forwardable flag set. + krb.create_ticket("test1", TEST1_PASSWORD, forwardable=True) + + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=prefer gssdelegation=1", + "succeeds with GSS-encrypted access preferred and hostgssenc hba and credentials not forwarded (server does not accept them, default)", + log_msgs=_auth_msgs("yes", "no"), + ) + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=require gssdelegation=1", + "succeeds with GSS-encrypted access required and hostgssenc hba and credentials not forwarded (server does not accept them, default)", + log_msgs=_auth_msgs("yes", "no"), + ) + + node.append_conf("gss_accept_delegation=off") + node.restart() + + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=prefer gssdelegation=1", + "succeeds with GSS-encrypted access preferred and hostgssenc hba and credentials not forwarded (server does not accept them, explicitly disabled)", + log_msgs=_auth_msgs("yes", "no"), + ) + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=require gssdelegation=1", + "succeeds with GSS-encrypted access required and hostgssenc hba and credentials not forwarded (server does not accept them, explicitly disabled)", + log_msgs=_auth_msgs("yes", "no"), + ) + + node.append_conf("gss_accept_delegation=on") + node.restart() + + _test_access( + node, + "test1", + STAT_DELEGATED, + 0, + "gssencmode=prefer gssdelegation=1", + "succeeds with GSS-encrypted access preferred and hostgssenc hba and credentials forwarded", + log_msgs=_auth_msgs("yes", "yes"), + ) + _test_access( + node, + "test1", + STAT_DELEGATED, + 0, + "gssencmode=require gssdelegation=1", + "succeeds with GSS-encrypted access required and hostgssenc hba and credentials forwarded", + log_msgs=_auth_msgs("yes", "yes"), + ) + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=prefer", + "succeeds with GSS-encrypted access preferred and hostgssenc hba and credentials not forwarded", + log_msgs=_auth_msgs("yes", "no"), + ) + _test_access( + node, + "test1", + STAT_NOT_DELEGATED, + 0, + "gssencmode=require gssdelegation=0", + "succeeds with GSS-encrypted access required and hostgssenc hba and credentials explicitly not forwarded", + log_msgs=_auth_msgs("yes", "no"), + ) + + require_connstr = "user=test1 host={host} hostaddr={addr} gssencmode=require gssdelegation=0".format( + host=HOST, addr=HOSTADDR + ) + _delegation_fails( + node, + "SELECT * FROM dblink('user=test1 dbname={db} host={host} hostaddr={addr} port={port}','select 1') as t1(c1 int);".format( + db=DBNAME, host=HOST, addr=HOSTADDR, port=port + ), + require_connstr, + "dblink attempt fails without delegated credentials", + ) + _delegation_fails( + node, + "SELECT * FROM dblink('user=test2 dbname={db} port={port} passfile={pgpass}','select 1') as t1(c1 int);".format( + db=DBNAME, port=port, pgpass=pgpass + ), + require_connstr, + "dblink does not work without delegated credentials and with passfile", + ) + _delegation_fails( + node, + "TABLE tf1;", + require_connstr, + "postgres_fdw does not work without delegated credentials", + ) + _delegation_fails( + node, + "TABLE tf2;", + require_connstr, + "postgres_fdw does not work without delegated credentials and with passfile", + ) + + _test_access( + node, + "test1", + "SELECT true", + 2, + "gssencmode=disable", + "fails with GSS encryption disabled and hostgssenc hba", + ) + + node.connect_ok( + _connstr(node, "test1", "gssencmode=require require_auth=gss"), + "GSS authentication requested, works with GSS encryption", + ) + node.connect_ok( + _connstr(node, "test1", "gssencmode=require require_auth=gss,scram-sha-256"), + "multiple authentication types requested, works with GSS encryption", + ) + + +def _hostnogssenc_scenarios(node, pgpass, port): + """hostnogssenc hba: delegated, unencrypted GSS connections.""" + _reset_hba( + node, + [ + " local all test2 scram-sha-256", + "\thostnogssenc all all {}/32 gss map=mymap".format(HOSTADDR), + ], + ) + + _test_access( + node, + "test1", + STAT_NOENC_DELEGATED, + 0, + "gssencmode=prefer gssdelegation=1", + "succeeds with GSS-encrypted access preferred and hostnogssenc hba, but no encryption", + log_msgs=_auth_msgs("no", "yes"), + ) + _test_access( + node, + "test1", + "SELECT true", + 2, + "gssencmode=require", + "fails with GSS-encrypted access required and hostnogssenc hba", + ) + _test_access( + node, + "test1", + STAT_NOENC_DELEGATED, + 0, + "gssencmode=disable gssdelegation=1", + "succeeds with GSS encryption disabled and hostnogssenc hba", + log_msgs=_auth_msgs("no", "yes"), + ) + + _test_query( + node, + "test1", + "SELECT * FROM dblink('user=test1 dbname={db} host={host} hostaddr={addr} port={port}','select 1') as t1(c1 int);".format( + db=DBNAME, host=HOST, addr=HOSTADDR, port=port + ), + r"^1$", + "gssencmode=prefer gssdelegation=1", + "dblink works not-encrypted (server not configured to accept encrypted GSSAPI connections)", + ) + _test_query( + node, + "test1", + "TABLE tf1;", + r"^1$", + "gssencmode=prefer gssdelegation=1", + "postgres_fdw works not-encrypted (server not configured to accept encrypted GSSAPI connections)", + ) + + prefer_connstr = "user=test1 host={host} hostaddr={addr} gssencmode=prefer gssdelegation=1".format( + host=HOST, addr=HOSTADDR + ) + _delegation_fails( + node, + "SELECT * FROM dblink('user=test2 dbname={db} port={port} passfile={pgpass}','select 1') as t1(c1 int);".format( + db=DBNAME, port=port, pgpass=pgpass + ), + prefer_connstr, + "dblink does not work with delegated credentials and with passfile", + ) + _delegation_fails( + node, + "TABLE tf2;", + prefer_connstr, + "postgres_fdw does not work with delegated credentials and with passfile", + ) + + +def _include_realm_scenarios(node, port): + """include_realm=0 hba: delegated encrypted connections + krb_realm check.""" + pathlib.Path(node.datadir, "pg_ident.conf").write_text("", encoding="utf-8") + _reset_hba( + node, + [ + " local all test2 scram-sha-256", + "\thost all all {}/32 gss include_realm=0".format(HOSTADDR), + ], + ) + + _test_access( + node, + "test1", + STAT_DELEGATED, + 0, + "gssdelegation=1", + "succeeds with include_realm=0 and defaults", + log_msgs=_auth_msgs("yes", "yes"), + ) + + _test_query( + node, + "test1", + "SELECT * FROM dblink('user=test1 dbname={db} host={host} hostaddr={addr} port={port} password=1234','select 1') as t1(c1 int);".format( + db=DBNAME, host=HOST, addr=HOSTADDR, port=port + ), + r"^1$", + "gssencmode=require gssdelegation=1", + "dblink works encrypted", + ) + _test_query( + node, + "test1", + "TABLE tf1;", + r"^1$", + "gssencmode=require gssdelegation=1", + "postgres_fdw works encrypted", + ) + + # Reset pg_hba.conf, and cause a usermap failure with an authentication + # that has passed. + _reset_hba( + node, + [ + " local all test2 scram-sha-256", + "\thost all all {}/32 gss include_realm=0 krb_realm=EXAMPLE.ORG".format( + HOSTADDR + ), + ], + ) + + _test_access( + node, + "test1", + "SELECT true", + 2, + "", + "fails with wrong krb_realm, but still authenticates", + log_msgs=[ + 'connection authenticated: identity="test1@{}" method=gss'.format(REALM) + ], + ) diff --git a/src/test/ldap/meson.build b/src/test/ldap/meson.build index d8961e6c8d705..a62996cff43a9 100644 --- a/src/test/ldap/meson.build +++ b/src/test/ldap/meson.build @@ -14,4 +14,14 @@ tests += { 'with_ldap': ldap.found() ? 'yes' : 'no', }, }, + 'pytest': { + 'tests': [ + 'pyt/test_001_auth.py', + 'pyt/test_002_bindpasswd.py', + 'pyt/test_003_ldap_connection_param_lookup.py', + ], + 'env': { + 'with_ldap': ldap.found() ? 'yes' : 'no', + }, + }, } diff --git a/src/test/ldap/pyt/conftest.py b/src/test/ldap/pyt/conftest.py new file mode 100644 index 0000000000000..48ca53f6d3030 --- /dev/null +++ b/src/test/ldap/pyt/conftest.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Shared fixtures for the LDAP pytest suite. + +Imports the ``ldap_server`` factory fixture from the shared ``pypg.ldapserver`` +helper so it is discovered by the tests in this directory. The fixture is the +Python twin of using ``LdapServer->new`` in the Perl TAP tests. +""" + +# pylint: disable=unused-import +from pypg.ldapserver import ( # noqa: F401 + ldap_server_fixture as ldap_server, +) diff --git a/src/test/ldap/pyt/test_001_auth.py b/src/test/ldap/pyt/test_001_auth.py new file mode 100644 index 0000000000000..50e0ff5414d57 --- /dev/null +++ b/src/test/ldap/pyt/test_001_auth.py @@ -0,0 +1,386 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/test/ldap/t/001_auth.pl. + +Exhaustive test of LDAP authentication via pg_hba.conf: simple bind, +search+bind, multiple servers, LDAP URLs, search filters (direct and embedded +in URLs), a diagnostic-message failure, and the TLS variants (StartTLS, LDAPS, +LDAPS via URL, and the rejected StartTLS+LDAPS combination). The LDAP server is +set up with anonymous auth. +""" + +import contextlib +import os + +# Patterns asserting an authenticated identity in the server log. +_AUTH_TEST1 = ( + r'connection authenticated: identity="uid=test1,dc=example,dc=net" method=ldap' +) +_AUTH_TEST2 = ( + r'connection authenticated: identity="uid=test2,dc=example,dc=net" method=ldap' +) +_NOT_AUTHENTICATED = r"connection authenticated:" + + +@contextlib.contextmanager +def _pgpassword(value): + """Temporarily set PGPASSWORD (mirrors the Perl ``$ENV{PGPASSWORD}``).""" + saved = os.environ.get("PGPASSWORD") + os.environ["PGPASSWORD"] = value + try: + yield + finally: + if saved is None: + os.environ.pop("PGPASSWORD", None) + else: + os.environ["PGPASSWORD"] = saved + + +def _test_access(node, role, expected_res, test_name, **params): + """Connect as role; assert success (0) or failure, mirroring test_access.""" + connstr = "user={}".format(role) + if expected_res == 0: + node.connect_ok(connstr, test_name, **params) + else: + # No checks of the error message, only the status code (plus any + # log_like/log_unlike params). + node.connect_fails(connstr, test_name, **params) + + +def _write_hba(node, line): + """Replace pg_hba.conf with a single line (mirrors unlink + append_conf).""" + hba = node.datadir / "pg_hba.conf" + hba.unlink() + node.append_conf(line, filename="pg_hba.conf") + + +def _setup_node(create_pg): + """Create and start the PostgreSQL instance with the test users.""" + node = create_pg("node", start=False) + node.append_conf("log_connections = all\n") + # Needed to allow connect_fails to inspect postmaster log: + node.append_conf("log_min_messages = debug2") + node.start() + node.safe_psql("CREATE USER test0;") + node.safe_psql("CREATE USER test1;") + node.safe_psql('CREATE USER "test2@example.net";') + return node + + +def _setup_ldap(ldap_server): + """Start the LDAP server and load the test data (anonymous auth).""" + ldap_rootpw = "secret" + ldap = ldap_server(ldap_rootpw, "anonymous") # use anonymous auth + ldap.ldapadd_file(os.path.join(os.path.dirname(__file__), "..", "authdata.ldif")) + ldap.ldapsetpw("uid=test1,dc=example,dc=net", "secret1") + ldap.ldapsetpw("uid=test2,dc=example,dc=net", "secret2") + return ldap + + +def test_001_auth(create_pg, ldap_server): + """Full matrix of LDAP authentication scenarios via pg_hba.conf.""" + ldap = _setup_ldap(ldap_server) + ( + ldap_server_host, + ldap_port, + ldaps_port, + ldap_url, + ldaps_url, + ldap_basedn, + ldap_rootdn, + ) = ldap.prop("server", "port", "s_port", "url", "s_url", "basedn", "rootdn") + + # don't bother to check the server's cert (though perhaps we should) + os.environ["LDAPTLS_REQCERT"] = "never" + + node = _setup_node(create_pg) + + ctx = { + "node": node, + "host": ldap_server_host, + "port": ldap_port, + "ldaps_port": ldaps_port, + "url": ldap_url, + "ldaps_url": ldaps_url, + "basedn": ldap_basedn, + "rootdn": ldap_rootdn, + } + + _simple_bind(ctx) + _search_bind(ctx) + _multiple_servers(ctx) + _ldap_urls(ctx) + _search_filters(ctx) + _search_filters_in_urls(ctx) + _diagnostic_message(ctx) + _tls(ctx) + + +def _simple_bind(ctx): + """Simple bind authentication with ldapprefix/ldapsuffix.""" + node = ctx["node"] + _write_hba( + node, + "local all all ldap ldapserver={host} ldapport={port} " + 'ldapprefix="uid=" ldapsuffix=",dc=example,dc=net"'.format(**ctx), + ) + node.restart() + + with _pgpassword("wrong"): + _test_access( + node, + "test0", + 2, + "simple bind authentication fails if user not found in LDAP", + log_unlike=[_NOT_AUTHENTICATED], + ) + _test_access( + node, + "test1", + 2, + "simple bind authentication fails with wrong password", + log_unlike=[_NOT_AUTHENTICATED], + ) + + with _pgpassword("secret1"): + _test_access( + node, + "test1", + 0, + "simple bind authentication succeeds", + log_like=[_AUTH_TEST1], + ) + # require_auth=password should complete successfully; other methods + # should fail. + node.connect_ok( + "user=test1 require_auth=password", + "password authentication required, works with ldap auth", + ) + node.connect_fails( + "user=test1 require_auth=scram-sha-256", + "SCRAM authentication required, fails with ldap auth", + ) + + +def _search_bind(ctx): + """Search+bind authentication with ldapbasedn.""" + node = ctx["node"] + _write_hba( + node, + "local all all ldap ldapserver={host} ldapport={port} " + 'ldapbasedn="{basedn}"'.format(**ctx), + ) + node.restart() + + with _pgpassword("wrong"): + _test_access( + node, + "test0", + 2, + "search+bind authentication fails if user not found in LDAP", + ) + _test_access( + node, "test1", 2, "search+bind authentication fails with wrong password" + ) + with _pgpassword("secret1"): + _test_access( + node, + "test1", + 0, + "search+bind authentication succeeds", + log_like=[_AUTH_TEST1], + ) + + +def _multiple_servers(ctx): + """Search+bind authentication with two ldapserver entries.""" + node = ctx["node"] + _write_hba( + node, + 'local all all ldap ldapserver="{host} {host}" ldapport={port} ' + 'ldapbasedn="{basedn}"'.format(**ctx), + ) + node.restart() + + with _pgpassword("wrong"): + _test_access( + node, + "test0", + 2, + "search+bind authentication fails if user not found in LDAP", + ) + _test_access( + node, "test1", 2, "search+bind authentication fails with wrong password" + ) + with _pgpassword("secret1"): + _test_access(node, "test1", 0, "search+bind authentication succeeds") + + +def _ldap_urls(ctx): + """Simple bind and search+bind via ldapurl.""" + node = ctx["node"] + _write_hba( + node, + 'local all all ldap ldapurl="{url}" ldapprefix="uid=" ' + 'ldapsuffix=",dc=example,dc=net"'.format(**ctx), + ) + node.restart() + + with _pgpassword("wrong"): + _test_access( + node, + "test0", + 2, + "simple bind with LDAP URL authentication fails if user not found in LDAP", + ) + _test_access( + node, + "test1", + 2, + "simple bind with LDAP URL authentication fails with wrong password", + ) + with _pgpassword("secret1"): + _test_access( + node, "test1", 0, "simple bind with LDAP URL authentication succeeds" + ) + + _write_hba( + node, + 'local all all ldap ldapurl="{url}/{basedn}?uid?sub"'.format(**ctx), + ) + node.restart() + + with _pgpassword("wrong"): + _test_access( + node, + "test0", + 2, + "search+bind with LDAP URL authentication fails if user not found in LDAP", + ) + _test_access( + node, + "test1", + 2, + "search+bind with LDAP URL authentication fails with wrong password", + ) + with _pgpassword("secret1"): + _test_access( + node, "test1", 0, "search+bind with LDAP URL authentication succeeds" + ) + + +def _search_filters(ctx): + """ldapsearchfilter that matches by uid or mail.""" + node = ctx["node"] + _write_hba( + node, + "local all all ldap ldapserver={host} ldapport={port} " + 'ldapbasedn="{basedn}" ' + 'ldapsearchfilter="(|(uid=$username)(mail=$username))"'.format(**ctx), + ) + node.restart() + + with _pgpassword("secret1"): + _test_access( + node, "test1", 0, "search filter finds by uid", log_like=[_AUTH_TEST1] + ) + with _pgpassword("secret2"): + _test_access( + node, + "test2@example.net", + 0, + "search filter finds by mail", + log_like=[_AUTH_TEST2], + ) + + +def _search_filters_in_urls(ctx): + """ldapsearchfilter embedded in an ldapurl, then combined with the option.""" + node = ctx["node"] + _write_hba( + node, + "local all all ldap " + 'ldapurl="{url}/{basedn}??sub?(|(uid=$username)(mail=$username))"'.format( + **ctx + ), + ) + node.restart() + + with _pgpassword("secret1"): + _test_access(node, "test1", 0, "search filter finds by uid") + with _pgpassword("secret2"): + _test_access(node, "test2@example.net", 0, "search filter finds by mail") + + # This is not documented: You can combine ldapurl and other ldap* + # settings. ldapurl is always parsed first, then the other settings + # override. It might be useful in a case like this. + _write_hba( + node, + 'local all all ldap ldapurl="{url}/{basedn}??sub" ' + 'ldapsearchfilter="(|(uid=$username)(mail=$username))"'.format(**ctx), + ) + node.restart() + + with _pgpassword("secret1"): + _test_access(node, "test1", 0, "combined LDAP URL and search filter") + + +def _diagnostic_message(ctx): + """Bad ldapprefix with a question mark triggers a diagnostic message.""" + node = ctx["node"] + _write_hba( + node, + "local all all ldap ldapserver={host} ldapport={port} " + 'ldapprefix="?uid=" ldapsuffix=""'.format(**ctx), + ) + node.restart() + + with _pgpassword("secret1"): + _test_access(node, "test1", 2, "any attempt fails due to bad search pattern") + + +def _tls(ctx): + """StartTLS, LDAPS, LDAPS via URL, and the bad StartTLS+LDAPS combination.""" + node = ctx["node"] + + # request StartTLS with ldaptls=1 + _write_hba( + node, + "local all all ldap ldapserver={host} ldapport={port} " + 'ldapbasedn="{basedn}" ldapsearchfilter="(uid=$username)" ' + "ldaptls=1".format(**ctx), + ) + node.restart() + with _pgpassword("secret1"): + _test_access(node, "test1", 0, "StartTLS") + + # request LDAPS with ldapscheme=ldaps + _write_hba( + node, + "local all all ldap ldapserver={host} ldapscheme=ldaps " + 'ldapport={ldaps_port} ldapbasedn="{basedn}" ' + 'ldapsearchfilter="(uid=$username)"'.format(**ctx), + ) + node.restart() + with _pgpassword("secret1"): + _test_access(node, "test1", 0, "LDAPS") + + # request LDAPS with ldapurl=ldaps://... + _write_hba( + node, + "local all all ldap " + 'ldapurl="{ldaps_url}/{basedn}??sub?(uid=$username)"'.format(**ctx), + ) + node.restart() + with _pgpassword("secret1"): + _test_access(node, "test1", 0, "LDAPS with URL") + + # bad combination of LDAPS and StartTLS + _write_hba( + node, + "local all all ldap " + 'ldapurl="{ldaps_url}/{basedn}??sub?(uid=$username)" ldaptls=1'.format(**ctx), + ) + node.restart() + with _pgpassword("secret1"): + _test_access(node, "test1", 2, "bad combination of LDAPS and StartTLS") diff --git a/src/test/ldap/pyt/test_002_bindpasswd.py b/src/test/ldap/pyt/test_002_bindpasswd.py new file mode 100644 index 0000000000000..686dad40a2bd9 --- /dev/null +++ b/src/test/ldap/pyt/test_002_bindpasswd.py @@ -0,0 +1,97 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/ldap/t/002_bindpasswd.pl. + +Exercises LDAP search+bind authentication with the ldapbindpasswd option +against an LDAP server set up with 'users' (non-anonymous) auth: a wrong +ldapbindpasswd must fail, the correct one must succeed. +""" + +import os + +import pytest + + +def _test_access(node, role, expected_res, test_name): + """Connect as role; assert success (0) or failure, mirroring test_access.""" + connstr = "user={}".format(role) + if expected_res == 0: + node.connect_ok(connstr, test_name) + else: + # No checks of the error message, only the status code. + node.connect_fails(connstr, test_name) + + +def test_002_bindpasswd(create_pg, ldap_server): + """search+bind with ldapbindpasswd: wrong fails, correct succeeds.""" + ldap_rootpw = "secret" + ldap = ldap_server(ldap_rootpw, "users") # no anonymous auth + ldap.ldapadd_file(os.path.join(os.path.dirname(__file__), "..", "authdata.ldif")) + ldap.ldapsetpw("uid=test1,dc=example,dc=net", "secret1") + ldap.ldapsetpw("uid=test2,dc=example,dc=net", "secret2") + + ldap_server_host, ldap_port, ldap_basedn, ldap_rootdn = ldap.prop( + "server", "port", "basedn", "rootdn" + ) + + node = create_pg("node", start=False) + node.append_conf("log_connections = all\n") + node.start() + + node.safe_psql("CREATE USER test0;") + node.safe_psql("CREATE USER test1;") + node.safe_psql('CREATE USER "test2@example.net";') + + old_pgpassword = os.environ.get("PGPASSWORD") + try: + # Note: this hba line preserves the deliberately malformed quoting from + # the Perl original (a missing close-quote after $ldap_rootdn), which is + # why authentication fails here. + _write_hba( + node, + 'local all all ldap ldapserver={} ldapport={} ldapbasedn="{}" ' + 'ldapbinddn="{} ldapbindpasswd=wrong'.format( + ldap_server_host, ldap_port, ldap_basedn, ldap_rootdn + ), + ) + node.restart() + + os.environ["PGPASSWORD"] = "secret1" + _test_access( + node, + "test1", + 2, + "search+bind authentication fails with wrong ldapbindpasswd", + ) + + _write_hba( + node, + 'local all all ldap ldapserver={} ldapport={} ldapbasedn="{}" ' + 'ldapbinddn="{}" ldapbindpasswd="{}"'.format( + ldap_server_host, ldap_port, ldap_basedn, ldap_rootdn, ldap_rootpw + ), + ) + node.restart() + + _test_access( + node, + "test1", + 0, + "search+bind authentication succeeds with ldapbindpasswd", + ) + finally: + if old_pgpassword is None: + os.environ.pop("PGPASSWORD", None) + else: + os.environ["PGPASSWORD"] = old_pgpassword + + +def _write_hba(node, line): + """Replace pg_hba.conf with a single line (mirrors unlink + append_conf).""" + hba = node.datadir / "pg_hba.conf" + hba.unlink() + node.append_conf(line, filename="pg_hba.conf") + + +if __name__ == "__main__": + raise SystemExit(pytest.main([__file__, "-v"])) diff --git a/src/test/ldap/pyt/test_003_ldap_connection_param_lookup.py b/src/test/ldap/pyt/test_003_ldap_connection_param_lookup.py new file mode 100644 index 0000000000000..c90aede4f604a --- /dev/null +++ b/src/test/ldap/pyt/test_003_ldap_connection_param_lookup.py @@ -0,0 +1,193 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/test/ldap/t/003_ldap_connection_param_lookup.pl. + +Tests connection-parameter lookup through an LDAP-backed pg_service.conf, +covering combinations of the service name, the PGSERVICE/PGSERVICEFILE/ +PGSYSCONFDIR environment variables, and a default pg_service.conf in +PGSYSCONFDIR. The service entry stores an LDAP URL whose directory lookup +returns the running server's host and port. +""" + +import contextlib +import os +import shutil + +from pypg.util import append_to_file + + +@contextlib.contextmanager +def _env(**overrides): + """Temporarily set/unset environment variables (mirrors Perl ``local``). + + A value of None removes the variable for the duration of the block. + """ + saved = {key: os.environ.get(key) for key in overrides} + try: + for key, value in overrides.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + yield + finally: + for key, value in saved.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + +def test_003_ldap_connection_param_lookup(create_pg, ldap_server, tmp_path): + """Service-name/file lookups resolve a connection via an LDAP service entry.""" + dummy_node = create_pg("dummy_node", start=False) + + node = create_pg("node", start=False) + node.start() + + ldap_rootpw = "secret" + ldap = ldap_server(ldap_rootpw, "anonymous") # use anonymous auth + ldap_dir = os.path.dirname(__file__) + ldap.ldapadd_file(os.path.join(ldap_dir, "..", "authdata.ldif")) + ldap.ldapsetpw("uid=test1,dc=example,dc=net", "secret1") + ldap.ldapsetpw("uid=test2,dc=example,dc=net", "secret2") + + td = tmp_path + + # Create ldap file based on postgres connection info. + ldif_valid = td / "connection_params.ldif" + append_to_file( + ldif_valid, + "\n" + "version:1\n" + "dn:cn=mydatabase,dc=example,dc=net\n" + "changetype:add\n" + "objectclass:top\n" + "objectclass:device\n" + "cn:mydatabase\n" + "description:host=" + str(node.host) + "\n" + "description:port=" + str(node.port) + "\n", + ) + ldap.ldapadd_file(ldif_valid) + + (ldap_port,) = ldap.prop("port") + + with _env(LDAPTLS_REQCERT="never"): + _run_service_tests(dummy_node, td, ldap_port) + + node.teardown_node() + + +def _run_service_tests(dummy_node, td, ldap_port): + """Create the service files and run all service lookup scenarios.""" + # File that includes a valid service name, using a decomposed connection + # string for its contents (an LDAP URL). + srvfile_valid = td / "pg_service_valid.conf" + append_to_file( + srvfile_valid, + "\n[my_srv]\n" + "ldap://localhost:{}/dc=example,dc=net?description?one?" + "(cn=mydatabase)\n".format(ldap_port), + ) + + # Empty file, used as default PGSERVICEFILE so no home-directory lookup is + # attempted. + srvfile_empty = td / "pg_service_empty.conf" + append_to_file(srvfile_empty, "") + + # Missing service file. + srvfile_missing = td / "pg_service_missing.conf" + + # Set the fallback service-file lookup directory (PGSYSCONFDIR) to this + # test's temp dir, and force PGSERVICEFILE to a default so the test never + # looks at a home directory. + with _env(PGSYSCONFDIR=str(td), PGSERVICEFILE=str(srvfile_empty)): + _check_valid_service_file(dummy_node, srvfile_valid) + _check_missing_service_file(dummy_node, srvfile_missing) + _check_default_service_file(dummy_node, srvfile_valid, td) + + +def _check_valid_service_file(dummy_node, srvfile_valid): + """Combinations of service name and a valid PGSERVICEFILE.""" + with _env(PGSERVICEFILE=str(srvfile_valid)): + dummy_node.connect_ok( + "service=my_srv", + 'connection with correct "service" string and PGSERVICEFILE', + sql="SELECT 'connect1_1'", + expected_stdout="connect1_1", + ) + dummy_node.connect_ok( + "postgres://?service=my_srv", + 'connection with correct "service" URI and PGSERVICEFILE', + sql="SELECT 'connect1_2'", + expected_stdout="connect1_2", + ) + dummy_node.connect_fails( + "service=undefined-service", + 'connection with incorrect "service" string and PGSERVICEFILE', + expected_stderr=r'definition of service "undefined-service" not found', + ) + with _env(PGSERVICE="my_srv"): + dummy_node.connect_ok( + "", + "connection with correct PGSERVICE and PGSERVICEFILE", + sql="SELECT 'connect1_3'", + expected_stdout="connect1_3", + ) + with _env(PGSERVICE="undefined-service"): + # connect_fails ignores expected_stdout (as the Perl harness does), + # so this only asserts a non-zero exit. + dummy_node.connect_fails( + "", + "connection with incorrect PGSERVICE and PGSERVICEFILE", + ) + + +def _check_missing_service_file(dummy_node, srvfile_missing): + """Case of an incorrect (missing) service file.""" + with _env(PGSERVICEFILE=str(srvfile_missing)): + dummy_node.connect_fails( + "service=my_srv", + 'connection with correct "service" string and incorrect PGSERVICEFILE', + expected_stderr=r'service file ".*pg_service_missing.conf" not found', + ) + + +def _check_default_service_file(dummy_node, srvfile_valid, td): + """Case of a service file named pg_service.conf in PGSYSCONFDIR.""" + # Create copy of valid file as the default pg_service.conf. + srvfile_default = td / "pg_service.conf" + shutil.copyfile(srvfile_valid, srvfile_default) + try: + dummy_node.connect_ok( + "service=my_srv", + 'connection with correct "service" string and pg_service.conf', + sql="SELECT 'connect2_1'", + expected_stdout="connect2_1", + ) + dummy_node.connect_ok( + "postgres://?service=my_srv", + 'connection with correct "service" URI and default pg_service.conf', + sql="SELECT 'connect2_2'", + expected_stdout="connect2_2", + ) + dummy_node.connect_fails( + "service=undefined-service", + 'connection with incorrect "service" string and default pg_service.conf', + expected_stderr=r'definition of service "undefined-service" not found', + ) + with _env(PGSERVICE="my_srv"): + dummy_node.connect_ok( + "", + "connection with correct PGSERVICE and default pg_service.conf", + sql="SELECT 'connect2_3'", + expected_stdout="connect2_3", + ) + with _env(PGSERVICE="undefined-service"): + # connect_fails ignores expected_stdout (matching the Perl harness). + dummy_node.connect_fails( + "", + "connection with incorrect PGSERVICE and default pg_service.conf", + ) + finally: + srvfile_default.unlink() diff --git a/src/test/pytest/pypg/ldapserver.py b/src/test/pytest/pypg/ldapserver.py new file mode 100644 index 0000000000000..337e2f126a048 --- /dev/null +++ b/src/test/pytest/pypg/ldapserver.py @@ -0,0 +1,432 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""In-process OpenLDAP server for testing pg_hba.conf ldap authentication. + +This is the Python twin of src/test/ldap/LdapServer.pm. It locates a suitable +``slapd`` binary and the OpenLDAP schema directory, writes a slapd config, +starts an LDAP server, loads LDIF data, and tears the server down. + +Like the Perl module, ``slapd`` is resolved to an absolute path while the +client tools (``ldapadd``, ``ldapsearch``, ``ldappasswd``) are expected to be +found on PATH. The binary/schema detection mirrors LdapServer.pm's OS-specific +locations and adds a generic fallback that locates ``slapd`` via PATH and the +schema directory relative to its install prefix (e.g. a Nix-style +``/etc/schema`` layout), so the tests run wherever OpenLDAP is +installed. +""" + +import os +import pathlib +import platform +import shutil +import subprocess +import time +from typing import List, Optional, Tuple + +import pytest + +from pypg.util import append_to_file, eprint, get_free_port + + +# The four schema files slapd.conf includes, mirroring LdapServer.pm. +_REQUIRED_SCHEMAS = ("core", "cosine", "nis", "inetorgperson") + + +def _schema_dir_has_all(directory: pathlib.Path) -> bool: + """Return True if directory contains every required .schema file.""" + return all((directory / (name + ".schema")).is_file() for name in _REQUIRED_SCHEMAS) + + +def _detect_from_known_paths() -> Tuple[Optional[str], Optional[pathlib.Path]]: + """Find slapd and the schema dir from LdapServer.pm's OS-specific paths. + + Returns (slapd_path, schema_dir) or (None, None) if no known layout matches. + """ + system = platform.system() + candidates: List[Tuple[str, str]] = [] + if system == "Darwin": + candidates = [ + ( + "/opt/homebrew/opt/openldap/libexec/slapd", + "/opt/homebrew/etc/openldap/schema", + ), + ("/usr/local/opt/openldap/libexec/slapd", "/usr/local/etc/openldap/schema"), + ("/opt/local/libexec/slapd", "/opt/local/etc/openldap/schema"), + ] + elif system == "Linux": + candidates = [ + ("/usr/sbin/slapd", "/etc/ldap/schema"), + ("/usr/sbin/slapd", "/etc/openldap/schema"), + ] + elif system == "FreeBSD": + candidates = [("/usr/local/libexec/slapd", "/usr/local/etc/openldap/schema")] + elif system == "OpenBSD": + candidates = [ + ("/usr/local/libexec/slapd", "/usr/local/share/examples/openldap/schema") + ] + for slapd, schema in candidates: + schema_dir = pathlib.Path(schema) + if schema_dir.is_dir() and _schema_dir_has_all(schema_dir): + return slapd, schema_dir + return None, None + + +def _detect_generic() -> Tuple[Optional[str], Optional[pathlib.Path]]: + """Find slapd via PATH/well-known dirs and the schema dir near its prefix. + + Covers installations (e.g. Nix) whose schema files live under + ``/etc/schema`` instead of the OS-standard locations. + """ + slapd = shutil.which("slapd") + if slapd is None: + for libexec in ("/usr/lib/openldap", "/usr/libexec/openldap"): + cand = os.path.join(libexec, "slapd") + if os.path.isfile(cand): + slapd = cand + break + if slapd is None: + return None, None + + # Walk up from the slapd binary looking for a schema directory containing + # the required *.schema files (e.g. /etc/schema, /share, + # /etc/openldap/schema). + base = pathlib.Path(slapd).resolve().parent + search_roots = [base, base.parent, base.parent.parent] + schema_subdirs = [ + ("etc", "schema"), + ("etc", "openldap", "schema"), + ("etc", "ldap", "schema"), + ("share", "openldap", "schema"), + ("share", "schema"), + ] + for root in search_roots: + for parts in schema_subdirs: + schema_cand = root.joinpath(*parts) + if schema_cand.is_dir() and _schema_dir_has_all(schema_cand): + return slapd, schema_cand + return None, None + + +def detect_ldap() -> Tuple[bool, Optional[str], Optional[pathlib.Path], Optional[str]]: + """Detect a usable slapd binary and OpenLDAP schema directory. + + Mirrors LdapServer.pm's INIT-phase detection (the OS-specific known paths) + and falls back to locating slapd on PATH with a schema dir near its install + prefix. + + Returns: + A 4-tuple ``(setup, slapd, schema_dir, error)``. ``setup`` is True when + a binary and schema dir were found; on failure ``error`` carries a + human-readable reason, matching ``$LdapServer::setup_error``. + """ + system = platform.system() + if system not in ("Darwin", "Linux", "FreeBSD", "OpenBSD"): + return False, None, None, "ldap tests not supported on {}".format(system) + + slapd, schema_dir = _detect_from_known_paths() + if slapd is None or schema_dir is None: + slapd, schema_dir = _detect_generic() + + if slapd is None or schema_dir is None or not os.path.isfile(slapd): + return False, None, None, "OpenLDAP server installation not found" + return True, slapd, schema_dir, None + + +# Module-level detection results, mirroring $LdapServer::setup / $setup_error. +setup, _SLAPD, _SCHEMA_DIR, setup_error = detect_ldap() + +# All running servers, terminated by stop_all() (mirrors the Perl END block). +_SERVERS: "List[LdapServer]" = [] + + +class LdapServer: + """A running OpenLDAP server for testing pg_hba.conf ldap authentication. + + Mirrors the LdapServer.pm class: the constructor writes a slapd config, + copies TLS certificates, starts slapd on freshly allocated ldap/ldaps + ports, and waits until it accepts requests. Use ``ldapadd_file`` and + ``ldapsetpw`` to populate the directory, ``prop`` to read settings, and + ``stop`` to terminate it. + """ + + def __init__( + self, rootpw: str, authtype: str, *, testname: str, test_temp, log_dir + ): + """Create and start a new LDAP server. + + Args: + rootpw: The rootdn password (used with the ldapbindpasswd option). + authtype: Either ``'users'`` or ``'anonymous'`` (the slapd ACL + ``by auth`` clause). + testname: A short name used in the slapd log filename. + test_temp: A directory (path-like) to hold the server's data, + config, certs, pid and password files. + log_dir: Directory where the slapd log file is written. + """ + if not setup: + raise RuntimeError("no suitable binaries found") + + test_temp = pathlib.Path(test_temp) + self._test_temp = test_temp + ldap_datadir = test_temp / "openldap-data" + slapd_certs = test_temp / "slapd-certs" + self._pidfile = test_temp / "slapd.pid" + slapd_conf = test_temp / "slapd.conf" + slapd_logfile = pathlib.Path(log_dir) / "slapd-{}.log".format(testname) + + self._server = "localhost" + self._port = get_free_port() + self._s_port = get_free_port() + self._url = "ldap://{}:{}".format(self._server, self._port) + self._s_url = "ldaps://{}:{}".format(self._server, self._s_port) + self._basedn = "dc=example,dc=net" + self._rootdn = "cn=Manager,dc=example,dc=net" + self._rootpw = rootpw + self._pwfile = test_temp / "ldappassword" + self._process: Optional[subprocess.Popen] = None + + assert _SCHEMA_DIR is not None # guaranteed by setup check above + conf = ( + "include {schema}/core.schema\n" + "include {schema}/cosine.schema\n" + "include {schema}/nis.schema\n" + "include {schema}/inetorgperson.schema\n" + "\n" + "pidfile {pidfile}\n" + "logfile {logfile}\n" + "\n" + "access to *\n" + " by * read\n" + " by {authtype} auth\n" + "\n" + "database ldif\n" + "directory {datadir}\n" + "\n" + "TLSCACertificateFile {certs}/ca.crt\n" + "TLSCertificateFile {certs}/server.crt\n" + "TLSCertificateKeyFile {certs}/server.key\n" + "\n" + 'suffix "dc=example,dc=net"\n' + 'rootdn "{rootdn}"\n' + 'rootpw "{rootpw}"\n' + ).format( + schema=_SCHEMA_DIR, + pidfile=self._pidfile, + logfile=slapd_logfile, + authtype=authtype, + datadir=ldap_datadir, + certs=slapd_certs, + rootdn=self._rootdn, + rootpw=self._rootpw, + ) + append_to_file(slapd_conf, conf) + + ldap_datadir.mkdir() + slapd_certs.mkdir() + + certdir = pathlib.Path(__file__).resolve().parent.parent.parent / "ssl" / "ssl" + shutil.copyfile(certdir / "server_ca.crt", slapd_certs / "ca.crt") + if not (slapd_certs / "ca.crt").is_file(): + raise RuntimeError("copying ca.crt (error unknown)") + shutil.copyfile(certdir / "server-cn-only.crt", slapd_certs / "server.crt") + shutil.copyfile(certdir / "server-cn-only.key", slapd_certs / "server.key") + + append_to_file(self._pwfile, self._rootpw) + os.chmod(self._pwfile, 0o600) + + self._start(slapd_conf) + _SERVERS.append(self) + + def _start(self, slapd_conf: pathlib.Path) -> None: + """Start slapd and poll with ldapsearch until it accepts requests.""" + assert _SLAPD is not None # guaranteed by setup check + # -s0 prevents log messages ending up in syslog. + self._process = subprocess.Popen( # pylint: disable=consider-using-with + [ + _SLAPD, + "-f", + str(slapd_conf), + "-s0", + "-h", + "{} {}".format(self._url, self._s_url), + ] + ) + retries = 0 + while True: + probe = subprocess.run( + [ + "ldapsearch", + "-sbase", + "-H", + self._url, + "-b", + self._basedn, + "-D", + self._rootdn, + "-y", + str(self._pwfile), + "-n", + "'objectclass=*'", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if probe.returncode == 0: + break + retries += 1 + if retries >= 300: + raise RuntimeError("cannot connect to slapd") + eprint("# waiting for slapd to accept requests...") + time.sleep(1) + + def _ldapenv(self) -> dict: + """Return an environment with LDAPURI/LDAPBINDDN set (cf. _ldapenv).""" + env = dict(os.environ) + env["LDAPURI"] = self._url + env["LDAPBINDDN"] = self._rootdn + return env + + def ldapadd_file(self, path) -> None: + """Add the contents of an LDIF file to the LDAP server. + + Args: + path: Path to a file containing LDIF data to add. + """ + subprocess.run( + ["ldapadd", "-x", "-y", str(self._pwfile), "-f", str(path)], + env=self._ldapenv(), + check=True, + ) + + def ldapsetpw(self, user: str, password: str) -> None: + """Set a user's password in the LDAP server (cf. ldapsetpw).""" + subprocess.run( + ["ldappasswd", "-x", "-y", str(self._pwfile), "-s", password, user], + env=self._ldapenv(), + check=True, + ) + + def prop(self, *names: str): + """Return the values of the named properties (cf. LdapServer.pm prop). + + Recognized names: server, port, s_port, url, s_url, basedn, rootdn, + pwfile. + """ + mapping = { + "server": self._server, + "port": self._port, + "s_port": self._s_port, + "url": self._url, + "s_url": self._s_url, + "basedn": self._basedn, + "rootdn": self._rootdn, + "pwfile": self._pwfile, + } + return tuple(mapping[name] for name in names) + + @property + def server(self) -> str: + """The LDAP server hostname (localhost).""" + return self._server + + @property + def port(self) -> int: + """The plaintext (ldap://) port.""" + return self._port + + @property + def s_port(self) -> int: + """The TLS (ldaps://) port.""" + return self._s_port + + @property + def url(self) -> str: + """The ldap:// URL.""" + return self._url + + @property + def s_url(self) -> str: + """The ldaps:// URL.""" + return self._s_url + + @property + def basedn(self) -> str: + """The directory base DN (dc=example,dc=net).""" + return self._basedn + + @property + def rootdn(self) -> str: + """The directory root DN (cn=Manager,dc=example,dc=net).""" + return self._rootdn + + def stop(self) -> None: + """Terminate the slapd process if it is running (cf. the Perl END block).""" + if self._process is not None and self._process.poll() is None: + self._process.terminate() + try: + self._process.wait(timeout=30) + except subprocess.TimeoutExpired: + self._process.kill() + self._process.wait() + self._process = None + if self in _SERVERS: + _SERVERS.remove(self) + + +def stop_all() -> None: + """Stop every running LdapServer (mirrors the Perl END block).""" + for server in list(_SERVERS): + server.stop() + + +def require_ldap_enabled() -> None: + """Skip unless LDAP is built in, opted into, and slapd is available. + + Mirrors the three ``plan skip_all`` checks at the top of the .pl tests: + the ``with_ldap`` build flag, the ``ldap`` PG_TEST_EXTRA opt-in, and the + presence of a usable slapd binary. + """ + if os.environ.get("with_ldap") != "yes": + pytest.skip("LDAP not supported by this build") + extra = os.environ.get("PG_TEST_EXTRA", "") + if "ldap" not in extra.split(): + pytest.skip("Potentially unsafe test LDAP not enabled in PG_TEST_EXTRA") + ok, _slapd, _schema, error = detect_ldap() + if not ok: + pytest.skip(error or "OpenLDAP server installation not found") + + +@pytest.fixture(name="ldap_server") +def ldap_server_fixture(request, tmp_path_factory): + """Factory fixture that starts LDAP servers and stops them after the test. + + The returned callable takes ``(rootpw, authtype)`` (``authtype`` is + ``'users'`` or ``'anonymous'``) and returns a running ``LdapServer``, + mirroring ``LdapServer->new`` in the Perl suite. Each server gets its own + temp directory; the slapd log is written under a per-test log directory so + it survives for failure inspection. + """ + require_ldap_enabled() + + testname = request.node.name + log_dir = tmp_path_factory.mktemp("ldap-log-{}".format(testname)) + started: List[LdapServer] = [] + + def _make(rootpw: str, authtype: str) -> LdapServer: + test_temp = tmp_path_factory.mktemp("ldap-{}".format(testname)) + server = LdapServer( + rootpw, + authtype, + testname=testname, + test_temp=test_temp, + log_dir=log_dir, + ) + started.append(server) + return server + + yield _make + + for server in started: + server.stop() + stop_all() diff --git a/src/test/pytest/pypg/ssl_server.py b/src/test/pytest/pypg/ssl_server.py new file mode 100644 index 0000000000000..ab478c2ca164d --- /dev/null +++ b/src/test/pytest/pypg/ssl_server.py @@ -0,0 +1,329 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Python port of src/test/ssl/t/SSL/Server.pm and SSL/Backend/OpenSSL.pm. + +Configures a PostgresServer (created via the create_pg fixture) for the SSL +regression tests, mirroring SSL::Server / SSL::Backend::OpenSSL: + +- enables SSL and rejects non-SSL connections +- creates the trustdb/certdb/... databases and ssltestuser/... users +- installs the pre-generated certificates, keys and CRLs into the data dir +- switches the active server certificate (switch_server_cert) + +The certificate fixtures live in the ssl/ subdirectory of the SSL test +directory (the same files the Perl suite uses); they are reused verbatim and +never regenerated. Client keys must not be world-readable, so they are copied +to a private temporary directory with 0600 permissions, exactly as the Perl +backend does. +""" + +import glob +import os +import pathlib +import shutil +import stat + +import pypg + +# The SSL test directory (src/test/ssl), holding the ssl/ fixtures. The Perl +# suite runs from this directory and refers to certs as e.g. +# "ssl/root+server_ca.crt"; resolve the same way regardless of cwd. This helper +# lives in pypg/, so the repo root is four parents up. +_SSL_TEST_DIR = pathlib.Path(__file__).resolve().parents[4] / "src" / "test" / "ssl" +_SSL_FILES_DIR = _SSL_TEST_DIR / "ssl" + +# The databases and users created by configure_test_server_for_ssl. +_DATABASES = ("trustdb", "certdb", "certdb_dn", "certdb_dn_re", "certdb_cn", "verifydb") +_USERS = ("ssltestuser", "md5testuser", "anotheruser", "yetanotheruser") + +# Client keys whose permissions must be tightened before use, mirroring the +# list in SSL::Backend::OpenSSL->init. +_CLIENT_KEYS = ( + "client.key", + "client-revoked.key", + "client-der.key", + "client-encrypted-pem.key", + "client-encrypted-der.key", + "client-dn.key", + "client_ext.key", + "client-long.key", + "client-revoked-utf8.key", +) + + +class OpenSSLBackend: + """Mirror of SSL::Backend::OpenSSL. + + Installs the OpenSSL certificate fixtures into a cluster's data directory + and produces the ssl_cert_file/ssl_key_file/... configuration fragments. + """ + + def __init__(self, key_tempdir: pathlib.Path): + self._library = "OpenSSL" + self._key: dict = {} + self._key_tempdir = key_tempdir + + def init(self, pgdata: pathlib.Path) -> None: + """Install certificates, keys and CRLs into the cluster data dir. + + Copies the server certs/keys, CA certs and CRLs the Perl backend copies, + tightens server key permissions to 0600, then copies the client keys to + a private temp directory (also 0600) plus a deliberately world-readable + copy of client.key for the file-permission test. + """ + pgdata = pathlib.Path(pgdata) + self._copy_files("server-*.crt", pgdata) + self._copy_files("server-*.key", pgdata) + for key in glob.glob(str(pgdata / "server-*.key")): + os.chmod(key, 0o600) + for name in ( + "root+client_ca.crt", + "root+server_ca.crt", + "root_ca.crt", + "root+client.crl", + ): + self._copy_files(name, pgdata) + + crldir = pgdata / "root+client-crldir" + crldir.mkdir() + self._copy_files("root+client-crldir/*", crldir) + + for keyfile in _CLIENT_KEYS: + dest = self._key_tempdir / keyfile + shutil.copyfile(_SSL_FILES_DIR / keyfile, dest) + os.chmod(dest, 0o600) + self._key[keyfile] = str(dest) + + # A world-readable copy of client.key, to test rejection of bad perms. + wrongperms = self._key_tempdir / "client_wrongperms.key" + shutil.copyfile(_SSL_FILES_DIR / "client.key", wrongperms) + os.chmod(wrongperms, 0o644) + self._key["client_wrongperms.key"] = str(wrongperms) + + def get_sslkey(self, keyfile: str) -> str: + """Return an ' sslkey=' connstr fragment for a tightened key.""" + return " sslkey={}".format(self._key[keyfile]) + + def set_server_cert(self, params: dict) -> str: + """Return the sslconfig.conf body selecting the given cert/key/CRL. + + Mirrors SSL::Backend::OpenSSL->set_server_cert, including the cafile and + crlfile defaults and the empty-cafile special case (which sets + ssl_ca_file=''). + """ + cafile = params.get("cafile", "root+client_ca") + crlfile = params.get("crlfile", "root+client.crl") + certfile = params["certfile"] + keyfile = params.get("keyfile", certfile) + + sslconf = ( + "ssl_cert_file='{}.crt'\n" + "ssl_key_file='{}.key'\n" + "ssl_crl_file='{}'\n".format(certfile, keyfile, crlfile) + ) + if cafile != "": + sslconf += "ssl_ca_file='{}.crt'\n".format(cafile) + else: + sslconf += "ssl_ca_file=''\n" + if "crldir" in params and params["crldir"] is not None: + sslconf += "ssl_crl_dir='{}'\n".format(params["crldir"]) + return sslconf + + def get_library(self) -> str: + """Return the SSL library name, "OpenSSL".""" + return self._library + + def library_is_libressl(self) -> bool: + """Detect whether the SSL library is LibreSSL. + + The HAVE_SSL_CTX_SET_CERT_CB macro isn't defined for LibreSSL, matching + the (admittedly bogus) heuristic in the Perl backend. + """ + return not pypg.check_pg_config("#define HAVE_SSL_CTX_SET_CERT_CB 1") + + @staticmethod + def _copy_files(orig_glob: str, dest: pathlib.Path) -> None: + """Copy files matching a glob (relative to the ssl/ dir) into dest.""" + for src in glob.glob(str(_SSL_FILES_DIR / orig_glob)): + shutil.copyfile(src, dest / os.path.basename(src)) + + +class SSLServer: + """Mirror of SSL::Server for the OpenSSL backend. + + Wraps a PostgresServer (from the create_pg fixture) and provides the + configure_test_server_for_ssl / switch_server_cert / sslkey helpers the + ported SSL tests use. + """ + + def __init__(self, key_tempdir: pathlib.Path): + self._backend = OpenSSLBackend(key_tempdir) + + def sslkey(self, keyfile: str) -> str: + """Return an ' sslkey=' connstr fragment for keyfile.""" + return self._backend.get_sslkey(keyfile) + + def ssl_library(self) -> str: + """Return the SSL backend library name.""" + return self._backend.get_library() + + def is_libressl(self) -> bool: + """Return True if the SSL backend is LibreSSL.""" + return self._backend.library_is_libressl() + + def configure_test_server_for_ssl( + self, node, serverhost, servercidr, authmethod, **params + ): + """Configure node for SSL connections. + + Creates the trustdb/certdb/... databases and ssltestuser/... users, + optionally setting passwords (password + password_enc), creating + extensions, then enables SSL logging, installs the cert fixtures, + restarts to pick up listen_addresses, and writes pg_hba/pg_ident for + SSL, exactly as SSL::Server->configure_test_server_for_ssl does. + """ + pgdata = pathlib.Path(node.datadir) + + for user in _USERS: + node.psql("postgres", "-c", "CREATE USER " + user) + for db in _DATABASES: + node.psql("postgres", "-c", "CREATE DATABASE " + db) + + self._set_passwords(node, params) + self._create_extensions(node, params) + + node.append_conf( + "fsync=off\n" + "log_connections=all\n" + "log_hostname=on\n" + "listen_addresses='{}'\n" + "log_statement=all".format(serverhost) + ) + node.append_conf("include 'sslconfig.conf'") + + # SSL configuration is appended here by switch_server_cert. + (pgdata / "sslconfig.conf").write_text("", encoding="utf-8") + + self._backend.init(pgdata) + + # Restart to load the new listen_addresses. + node.restart() + + # pg_hba must change after restart because hostssl requires ssl=on. + self._configure_hba_for_ssl(node, servercidr, authmethod) + + def switch_server_cert(self, node, **params): + """Rewrite sslconfig.conf to use the given cert/key/CA/CRL set. + + Mirrors SSL::Server->switch_server_cert: clears sslconfig.conf, writes + ssl=on plus the backend cert selection, exercises ssl_groups and + ssl_tls13_ciphers syntax, optionally sets the passphrase command (and + its reload flag), then restarts unless restart='no'. + """ + pgdata = pathlib.Path(node.datadir) + (pgdata / "sslconfig.conf").unlink() + node.append_conf("ssl=on", "sslconfig.conf") + node.append_conf(self._backend.set_server_cert(params), "sslconfig.conf") + node.append_conf("ssl_groups=prime256v1:secp521r1", "sslconfig.conf") + node.append_conf( + "ssl_tls13_ciphers=TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256", + "sslconfig.conf", + ) + if "passphrase_cmd" in params: + node.append_conf( + "ssl_passphrase_command='{}'".format(params["passphrase_cmd"]), + "sslconfig.conf", + ) + if "passphrase_cmd_reload" in params: + node.append_conf( + "ssl_passphrase_command_supports_reload='{}'".format( + params["passphrase_cmd_reload"] + ), + "sslconfig.conf", + ) + if params.get("restart") == "no": + return + node.restart() + + @staticmethod + def _set_passwords(node, params): + """Set per-user passwords when configure params request them.""" + if "password" not in params: + return + if "password_enc" not in params: + raise ValueError("password_enc must be set when password is set") + password = params["password"] + enc = params["password_enc"] + node.psql( + "postgres", + "-c", + "SET password_encryption='{}'; " + "ALTER USER ssltestuser PASSWORD '{}';".format(enc, password), + ) + node.psql( + "postgres", + "-c", + "SET password_encryption='md5'; " + "ALTER USER md5testuser PASSWORD '{}';".format(password), + ) + node.psql( + "postgres", + "-c", + "SET password_encryption='{}'; " + "ALTER USER anotheruser PASSWORD '{}';".format(enc, password), + ) + + @staticmethod + def _create_extensions(node, params): + """Create requested extensions in every test database.""" + for extension in params.get("extensions", []): + for db in _DATABASES: + node.psql(db, "-c", "CREATE EXTENSION {} CASCADE;".format(extension)) + + @staticmethod + def _configure_hba_for_ssl(node, servercidr, authmethod): + """Write the SSL pg_hba.conf and the DN/CN ident maps.""" + pgdata = pathlib.Path(node.datadir) + (pgdata / "pg_hba.conf").unlink() + node.append_conf( + "# TYPE DATABASE USER ADDRESS METHOD" + " OPTIONS\n" + "hostssl trustdb md5testuser {cidr} md5\n" + "hostssl trustdb all {cidr} {auth}\n" + "hostssl verifydb ssltestuser {cidr} {auth}" + " clientcert=verify-full\n" + "hostssl verifydb anotheruser {cidr} {auth}" + " clientcert=verify-full\n" + "hostssl verifydb yetanotheruser {cidr} {auth}" + " clientcert=verify-ca\n" + "hostssl certdb all {cidr} cert\n" + "hostssl certdb_dn all {cidr} cert clientname=DN map=dn\n" + "hostssl certdb_dn_re all {cidr} cert clientname=DN map=dnre\n" + "hostssl certdb_cn all {cidr} cert clientname=CN map=cn".format( + cidr=servercidr, auth=authmethod + ), + "pg_hba.conf", + ) + (pgdata / "pg_ident.conf").unlink() + node.append_conf( + "# MAPNAME SYSTEM-USERNAME" + " PG-USERNAME\n" + 'dn "CN=ssltestuser-dn,OU=Testing,OU=Engineering,O=PGDG"' + " ssltestuser\n" + 'dnre "/^.*OU=Testing,.*$" ' + "ssltestuser\n" + "cn ssltestuser-dn" + " ssltestuser", + "pg_ident.conf", + ) + + +def stat_is_world_readable(path) -> bool: + """Return True if path is group- or world-readable (perms & 0o066).""" + mode = stat.S_IMODE(os.stat(path).st_mode) + return bool(mode & 0o066) + + +def ssl_file_path(name: str) -> pathlib.Path: + """Return the path to a fixture under the ssl/ directory (e.g. client.key).""" + return _SSL_FILES_DIR / name diff --git a/src/test/ssl/meson.build b/src/test/ssl/meson.build index 922d9cee0bec2..d7d73e6f5bcc0 100644 --- a/src/test/ssl/meson.build +++ b/src/test/ssl/meson.build @@ -17,9 +17,17 @@ tests += { ], }, 'pytest': { + 'env': { + 'with_ssl': ssl_library, + 'OPENSSL': openssl.found() ? openssl.full_path() : '', + }, 'tests': [ 'pyt/test_client.py', 'pyt/test_server.py', + 'pyt/test_001_ssltests.py', + 'pyt/test_002_scram.py', + 'pyt/test_003_sslinfo.py', + 'pyt/test_004_sni.py', ], }, } diff --git a/src/test/ssl/pyt/conftest.py b/src/test/ssl/pyt/conftest.py index d121724800bfd..742b9d5c02277 100644 --- a/src/test/ssl/pyt/conftest.py +++ b/src/test/ssl/pyt/conftest.py @@ -128,10 +128,12 @@ def _tofile(self, data: bytes, *, suffix) -> str: return _Certs() -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture(scope="module") def ssl_setup(pg_server_module, certs, datadir): """ - Sets up required server settings for all tests in this module. + Sets up required server settings for tests that use the shared module + server (Jacob Champion's client/server-split tests). Parity ports that + create their own server via create_pg do not request this fixture. """ try: with pg_server_module.restarting() as s: diff --git a/src/test/ssl/pyt/test_001_ssltests.py b/src/test/ssl/pyt/test_001_ssltests.py new file mode 100644 index 0000000000000..ccb97ee68748b --- /dev/null +++ b/src/test/ssl/pyt/test_001_ssltests.py @@ -0,0 +1,970 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/ssl/t/001_ssltests.pl. + +The core SSL regression suite: password-protected server keys and reload +behavior, SSL protocol bounds, ssl_groups parsing, client-side sslmode/root +cert/CRL/sslcertmode handling, host name verification against CN and Subject +Alternative Names (including IP and IPv6 SANs), system trusted roots, and +server-side client-certificate authorization (cert/DN/CN mapping, revoked +certs, intermediate CAs and server-side CRL directories). +""" + +import os +import platform + +import pytest + +import pypg +from pypg.ssl_server import SSLServer, stat_is_world_readable + +# This suite opens up local TCP ports and is hidden behind PG_TEST_EXTRA=ssl. +pytestmark = pypg.require_test_extras("ssl") + +SERVERHOSTADDR = "127.0.0.1" +SERVERHOSTCIDR = "127.0.0.1/32" + +DEFAULT_SSL_CONNSTR = ( + "sslkey=invalid sslcert=invalid sslrootcert=invalid " + "sslcrl=invalid sslcrldir=invalid" +) + + +def test_001_ssltests(create_pg, tmp_path): + """Run the full SSL regression suite against an OpenSSL-enabled server.""" + if os.environ.get("with_ssl") != "openssl": + pytest.skip("OpenSSL not supported by this build") + + ssl_server = SSLServer(tmp_path) + libressl = ssl_server.is_libressl() + supports_sslcertmode_require = pypg.check_pg_config( + "#define HAVE_SSL_CTX_SET_CERT_CB 1" + ) + has_inet_pton = pypg.check_pg_config("#define HAVE_INET_PTON 1") + + node = create_pg("primary", hostaddr=SERVERHOSTADDR, start=False) + # Needed to allow connect_fails to inspect postmaster log. + node.append_conf("log_min_messages = debug2") + node.start() + + assert ( + node.safe_psql("SHOW ssl_library") == ssl_server.ssl_library() + ), "ssl_library parameter" + exec_backend = node.safe_psql("SHOW debug_exec_backend") + + ssl_server.configure_test_server_for_ssl( + node, SERVERHOSTADDR, SERVERHOSTCIDR, "trust" + ) + + _test_password_keys(node, ssl_server, exec_backend) + _test_protocol_and_groups(node, ssl_server) + + common_connstr = ( + "{} user=ssltestuser dbname=trustdb hostaddr={} " + "host=common-name.pg-ssltest.test".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + ssl_server.switch_server_cert(node, certfile="server-cn-only") + + _test_keylogging(node, common_connstr, libressl) + _test_root_certs(node, common_connstr, supports_sslcertmode_require) + _test_crls(node, common_connstr) + _test_hostname_verification(node, ssl_server, has_inet_pton) + _test_cn_and_san(node, ssl_server, has_inet_pton) + _test_system_roots(node, ssl_server, libressl) + _test_server_crl(node, ssl_server) + _test_protocol_versions(node) + _test_cert_authorization(node, ssl_server, supports_sslcertmode_require) + + +def _test_password_keys(node, ssl_server, exec_backend): + """Password-protected server keys and passphrase reload behavior.""" + # Wrong passphrase: server must not start at all. + ssl_server.switch_server_cert( + node, + certfile="server-cn-only", + cafile="root+client_ca", + keyfile="server-password", + passphrase_cmd="echo wrongpassword", + restart="no", + ) + assert ( + node.restart(fail_ok=True, log_like=[r"could not load private key file"]) + is False + ), "restart fails with password-protected key file with wrong password" + + # Correct passphrase but no reload support. + ssl_server.switch_server_cert( + node, + certfile="server-cn-only", + cafile="root+client_ca", + keyfile="server-password", + passphrase_cmd="echo secret1", + passphrase_cmd_reload="off", + restart="no", + ) + assert ( + node.restart(fail_ok=True, log_unlike=[r"could not load private key file"]) + is True + ), "restart succeeds with password-protected key file" + + common_connstr = ( + "{} user=ssltestuser dbname=trustdb hostaddr={} " + "host=common-name.pg-ssltest.test".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + require = "{} sslrootcert=ssl/root+server_ca.crt sslmode=require".format( + common_connstr + ) + if "on" in exec_backend: + node.connect_fails( + require, + "connect with correct server CA cert file sslmode=require", + expected_stderr=r"server does not support SSL", + ) + else: + node.connect_ok( + require, "connect with correct server CA cert file sslmode=require" + ) + + # Reloading should fail since we cannot execute the passphrase command. + node.reload() + log_start = node.wait_for_log( + r"cannot be reloaded because it requires a passphrase" + ) + + # Correct passphrase that can be reloaded. + ssl_server.switch_server_cert( + node, + certfile="server-cn-only", + cafile="root+client_ca", + keyfile="server-password", + passphrase_cmd="echo secret1", + passphrase_cmd_reload="on", + restart="no", + ) + assert ( + node.restart(fail_ok=True, log_unlike=[r"could not load private key file"]) + is True + ), "restart succeeds with password-protected key file" + node.connect_ok(require, "connect with correct server CA cert file sslmode=require") + + # Reloading should execute the reload command and reload the key. + node.reload() + log_start = node.wait_for_log(r"reloading configuration files", log_start) + log = pypg.slurp_file(node.log, log_start) + assert ( + "cannot be reloaded because it requires a passphrase" not in log + ), "passphrase could reload private key" + node.connect_ok(require, "connect with correct server CA cert file sslmode=require") + + +def _test_protocol_and_groups(node, ssl_server): + """SSL protocol bounds and ssl_groups parsing failures.""" + node.append_conf( + "ssl_min_protocol_version='TLSv1.2'\nssl_max_protocol_version='TLSv1.1'" + ) + assert ( + node.restart(fail_ok=True) is False + ), "restart fails with incorrect SSL protocol bounds" + + node.append_conf("ssl_min_protocol_version='TLSv1.2'\nssl_max_protocol_version=''") + assert ( + node.restart(fail_ok=True) is True + ), "restart succeeds with correct SSL protocol bounds" + + # Colon-separated groups: a bad value fails to start. The value is reset + # later by switch_server_cert (which rewrites sslconfig.conf from scratch). + node.append_conf("ssl_groups='bad:value'", "sslconfig.conf") + assert node.restart(fail_ok=True) is False, "restart fails with incorrect groups" + assert not node.log_matches(r"no SSL error reported"), "error message translated" + node.append_conf("ssl_groups='prime256v1'", "ssl_config.conf") + node.restart(fail_ok=True) + + +def _test_keylogging(node, common_connstr, libressl): + """sslkeylogfile creates a non-world-readable file, errors are non-fatal.""" + if libressl: + pytest.skip("Keylogging is not supported with LibreSSL") + + tempdir = node.basedir + keylog = tempdir / "key.txt" + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslkeylogfile={} sslmode=require".format( + common_connstr, keylog + ), + "connect with server root cert and sslkeylogfile={}".format(keylog), + ) + assert keylog.is_file(), "keylog file exists at: {}".format(keylog) + + if platform.system() == "Windows": + pytest.skip("Permissions check not enforced on Windows") + + assert not stat_is_world_readable(keylog), "keylog file is not world readable" + + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslkeylogfile={}/invalid/key.txt " + "sslmode=require".format(common_connstr, tempdir), + "connect with server root cert and incorrect sslkeylogfile path", + expected_stderr=r"could not open", + ) + + +def _test_root_certs(node, common_connstr, supports_sslcertmode_require): + """sslmode/root-cert combinations and sslcertmode without a client cert.""" + node.connect_fails( + "{} sslmode=disable".format(common_connstr), + "server doesn't accept non-SSL connections", + expected_stderr=r"no pg_hba.conf entry", + ) + + node.connect_ok( + "{} sslrootcert=invalid sslmode=require".format(common_connstr), + "connect without server root cert sslmode=require", + ) + for mode in ("verify-ca", "verify-full"): + node.connect_fails( + "{} sslrootcert=invalid sslmode={}".format(common_connstr, mode), + "connect without server root cert sslmode={}".format(mode), + expected_stderr=r'root certificate file "invalid" does not exist', + ) + + for mode in ("require", "verify-ca", "verify-full"): + node.connect_fails( + "{} sslrootcert=ssl/client_ca.crt sslmode={}".format(common_connstr, mode), + "connect with wrong server root cert sslmode={}".format(mode), + expected_stderr=r"SSL error: certificate verify failed", + ) + + node.connect_fails( + "{} sslrootcert=ssl/server_ca.crt sslmode=verify-ca".format(common_connstr), + "connect with server CA cert, without root CA", + expected_stderr=r"SSL error: certificate verify failed", + ) + + for mode in ("require", "verify-ca", "verify-full"): + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode={}".format( + common_connstr, mode + ), + "connect with correct server CA cert file sslmode={}".format(mode), + ) + + node.connect_ok( + "{} sslrootcert=ssl/both-cas-1.crt sslmode=verify-ca".format(common_connstr), + "cert root file that contains two certificates, order 1", + ) + node.connect_ok( + "{} sslrootcert=ssl/both-cas-2.crt sslmode=verify-ca".format(common_connstr), + "cert root file that contains two certificates, order 2", + ) + + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require " + "sslcertmode=disable".format(common_connstr), + "connect with sslcertmode=disable", + ) + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require " + "sslcertmode=allow".format(common_connstr), + "connect with sslcertmode=allow", + ) + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require " + "sslcertmode=require".format(common_connstr), + "connect with sslcertmode=require fails without a client certificate", + expected_stderr=( + r"server accepted connection without a valid SSL certificate" + if supports_sslcertmode_require + else r'sslcertmode value "require" is not supported' + ), + ) + + +def _test_crls(node, common_connstr): + """Client-side sslcrl / sslcrldir handling.""" + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=verify-ca sslcrl=invalid".format( + common_connstr + ), + "sslcrl option with invalid file name", + ) + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=verify-ca " + "sslcrl=ssl/client.crl".format(common_connstr), + "CRL belonging to a different CA", + expected_stderr=r"SSL error: certificate verify failed", + ) + node.connect_fails( + "{} sslcrl='' sslrootcert=ssl/root+server_ca.crt sslmode=verify-ca " + "sslcrldir=ssl/client-crldir".format(common_connstr), + "directory CRL belonging to a different CA", + expected_stderr=r"SSL error: certificate verify failed", + ) + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=verify-ca " + "sslcrl=ssl/root+server.crl".format(common_connstr), + "CRL with a non-revoked cert", + ) + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=verify-ca " + "sslcrldir=ssl/root+server-crldir".format(common_connstr), + "directory CRL with a non-revoked cert", + ) + + +def _verify_full_base(): + """Common connstr for the host name verification tests (verify-full).""" + return ( + "{} user=ssltestuser dbname=trustdb sslrootcert=ssl/root+server_ca.crt " + "hostaddr={}".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + + +def _test_hostname_verification(node, ssl_server, has_inet_pton): + """Host name vs. server certificate matching for CN, IP CN and SANs.""" + base = _verify_full_base() + node.connect_ok( + "{} sslmode=require host=wronghost.test".format(base), + "mismatch between host name and server certificate sslmode=require", + ) + node.connect_ok( + "{} sslmode=verify-ca host=wronghost.test".format(base), + "mismatch between host name and server certificate sslmode=verify-ca", + ) + node.connect_fails( + "{} sslmode=verify-full host=wronghost.test".format(base), + "mismatch between host name and server certificate sslmode=verify-full", + expected_stderr=r'server certificate for "common-name.pg-ssltest.test" does not match host name "wronghost.test"', + ) + + ssl_server.switch_server_cert(node, certfile="server-ip-cn-only") + vf = "{} sslmode=verify-full".format(base) + node.connect_ok( + "{} host=192.0.2.1 sslsni=0".format(vf), "IP address in the Common Name" + ) + node.connect_fails( + "{} host=192.000.002.001 sslsni=0".format(vf), + "mismatch between host name and server certificate IP address", + expected_stderr=r'server certificate for "192.0.2.1" does not match host name "192.000.002.001"', + ) + + ssl_server.switch_server_cert(node, certfile="server-ip-in-dnsname") + node.connect_ok("{} host=192.0.2.1 sslsni=0".format(vf), "IP address in a dNSName") + + _test_san(node, ssl_server, vf, has_inet_pton) + + +def _test_san(node, ssl_server, vf, has_inet_pton): + """X.509 Subject Alternative Name matching (DNS, wildcard, IP, IPv6).""" + ssl_server.switch_server_cert(node, certfile="server-multiple-alt-names") + node.connect_ok( + "{} host=dns1.alt-name.pg-ssltest.test".format(vf), + "host name matching with X.509 Subject Alternative Names 1", + ) + node.connect_ok( + "{} host=dns2.alt-name.pg-ssltest.test".format(vf), + "host name matching with X.509 Subject Alternative Names 2", + ) + node.connect_ok( + "{} host=foo.wildcard.pg-ssltest.test".format(vf), + "host name matching with X.509 Subject Alternative Names wildcard", + ) + node.connect_fails( + "{} host=wronghost.alt-name.pg-ssltest.test".format(vf), + "host name not matching with X.509 Subject Alternative Names", + expected_stderr=r'server certificate for "dns1.alt-name.pg-ssltest.test" \(and 2 other names\) does not match host name "wronghost.alt-name.pg-ssltest.test"', + ) + node.connect_fails( + "{} host=deep.subdomain.wildcard.pg-ssltest.test".format(vf), + "host name not matching with X.509 Subject Alternative Names wildcard", + expected_stderr=r'server certificate for "dns1.alt-name.pg-ssltest.test" \(and 2 other names\) does not match host name "deep.subdomain.wildcard.pg-ssltest.test"', + ) + + ssl_server.switch_server_cert(node, certfile="server-single-alt-name") + node.connect_ok( + "{} host=single.alt-name.pg-ssltest.test".format(vf), + "host name matching with a single X.509 Subject Alternative Name", + ) + node.connect_fails( + "{} host=wronghost.alt-name.pg-ssltest.test".format(vf), + "host name not matching with a single X.509 Subject Alternative Name", + expected_stderr=r'server certificate for "single.alt-name.pg-ssltest.test" does not match host name "wronghost.alt-name.pg-ssltest.test"', + ) + node.connect_fails( + "{} host=deep.subdomain.wildcard.pg-ssltest.test".format(vf), + "host name not matching with a single X.509 Subject Alternative Name wildcard", + expected_stderr=r'server certificate for "single.alt-name.pg-ssltest.test" does not match host name "deep.subdomain.wildcard.pg-ssltest.test"', + ) + + if not has_inet_pton: + return + _test_ip_san(node, ssl_server, vf) + + +def _test_ip_san(node, ssl_server, vf): + """IP and IPv6 addresses in Subject Alternative Names.""" + ssl_server.switch_server_cert(node, certfile="server-ip-alt-names") + node.connect_ok( + "{} host=192.0.2.1".format(vf), + "host matching an IPv4 address (Subject Alternative Name 1)", + ) + node.connect_ok( + "{} host=192.000.002.001".format(vf), + "host matching an IPv4 address in alternate form (Subject Alternative Name 1)", + ) + node.connect_fails( + "{} host=192.0.2.2".format(vf), + "host not matching an IPv4 address (Subject Alternative Name 1)", + expected_stderr=r'server certificate for "192.0.2.1" \(and 1 other name\) does not match host name "192.0.2.2"', + ) + node.connect_ok( + "{} host=2001:DB8::1".format(vf), + "host matching an IPv6 address (Subject Alternative Name 2)", + ) + node.connect_ok( + "{} host=2001:db8:0:0:0:0:0:1".format(vf), + "host matching an IPv6 address in alternate form (Subject Alternative Name 2)", + ) + node.connect_ok( + "{} host=2001:db8::0.0.0.1".format(vf), + "host matching an IPv6 address in mixed form (Subject Alternative Name 2)", + ) + node.connect_fails( + "{} host=::1".format(vf), + "host not matching an IPv6 address (Subject Alternative Name 2)", + expected_stderr=r'server certificate for "192.0.2.1" \(and 1 other name\) does not match host name "::1"', + ) + node.connect_fails( + "{} host=2001:DB8::1/128".format(vf), + "IPv6 host with CIDR mask does not match", + expected_stderr=r'server certificate for "192.0.2.1" \(and 1 other name\) does not match host name "2001:DB8::1/128"', + ) + + +def _test_cn_and_san(node, ssl_server, has_inet_pton): + """CN-vs-SAN precedence rules (RFCs 2818/6125) and no-name certificates.""" + ssl_server.switch_server_cert(node, certfile="server-cn-and-alt-names") + vf = "{} sslmode=verify-full".format(_verify_full_base()) + node.connect_ok( + "{} host=dns1.alt-name.pg-ssltest.test".format(vf), + "certificate with both a CN and SANs 1", + ) + node.connect_ok( + "{} host=dns2.alt-name.pg-ssltest.test".format(vf), + "certificate with both a CN and SANs 2", + ) + node.connect_fails( + "{} host=common-name.pg-ssltest.test".format(vf), + "certificate with both a CN and SANs ignores CN", + expected_stderr=r'server certificate for "dns1.alt-name.pg-ssltest.test" \(and 1 other name\) does not match host name "common-name.pg-ssltest.test"', + ) + + if has_inet_pton: + _test_cn_and_ip_san(node, ssl_server, vf) + + ssl_server.switch_server_cert(node, certfile="server-ip-cn-and-dns-alt-names") + node.connect_ok( + "{} host=192.0.2.1".format(vf), + "certificate with both an IP CN and DNS SANs matches CN", + ) + node.connect_ok( + "{} host=dns1.alt-name.pg-ssltest.test".format(vf), + "certificate with both an IP CN and DNS SANs matches SAN 1", + ) + node.connect_ok( + "{} host=dns2.alt-name.pg-ssltest.test".format(vf), + "certificate with both an IP CN and DNS SANs matches SAN 2", + ) + + _test_no_names(node, ssl_server) + + +def _test_cn_and_ip_san(node, ssl_server, vf): + """Fall back to the CN only when the SANs are all IP addresses.""" + ssl_server.switch_server_cert(node, certfile="server-cn-and-ip-alt-names") + node.connect_ok( + "{} host=common-name.pg-ssltest.test".format(vf), + "certificate with both a CN and IP SANs matches CN", + ) + node.connect_ok( + "{} host=192.0.2.1".format(vf), + "certificate with both a CN and IP SANs matches SAN 1", + ) + node.connect_ok( + "{} host=2001:db8::1".format(vf), + "certificate with both a CN and IP SANs matches SAN 2", + ) + + ssl_server.switch_server_cert(node, certfile="server-ip-cn-and-alt-names") + node.connect_ok( + "{} host=192.0.2.2".format(vf), + "certificate with both an IP CN and IP SANs 1", + ) + node.connect_ok( + "{} host=2001:db8::1".format(vf), + "certificate with both an IP CN and IP SANs 2", + ) + node.connect_fails( + "{} host=192.0.2.1".format(vf), + "certificate with both an IP CN and IP SANs ignores CN", + expected_stderr=r'server certificate for "192.0.2.2" \(and 1 other name\) does not match host name "192.0.2.1"', + ) + + +def _test_no_names(node, ssl_server): + """A server certificate with no CN and no SANs is handled gracefully.""" + ssl_server.switch_server_cert(node, certfile="server-no-names") + base = ( + "{} user=ssltestuser dbname=trustdb sslrootcert=ssl/root+server_ca.crt " + "hostaddr={}".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + node.connect_ok( + "{} sslmode=verify-ca host=common-name.pg-ssltest.test".format(base), + "server certificate without CN or SANs sslmode=verify-ca", + ) + node.connect_fails( + "{} sslmode=verify-full host=common-name.pg-ssltest.test".format(base), + "server certificate without CN or SANs sslmode=verify-full", + expected_stderr=r"could not get server's host name from server certificate", + ) + + +def _test_system_roots(node, ssl_server, libressl): + """sslrootcert=system and the SSL_CERT_FILE override.""" + ssl_server.switch_server_cert( + node, + certfile="server-cn-only+server_ca", + keyfile="server-cn-only", + cafile="root_ca", + ) + base = "{} user=ssltestuser dbname=trustdb sslrootcert=system hostaddr={}".format( + DEFAULT_SSL_CONNSTR, SERVERHOSTADDR + ) + + # By default our custom-CA-signed certificate should not be trusted. + node.connect_fails( + "{} sslmode=verify-full host=common-name.pg-ssltest.test".format(base), + "sslrootcert=system does not connect with private CA", + expected_stderr=r"SSL error: (certificate verify failed|unregistered scheme)", + ) + node.connect_fails( + "{} sslmode=verify-ca host=common-name.pg-ssltest.test".format(base), + "sslrootcert=system only accepts sslmode=verify-full", + expected_stderr=r'weak sslmode "verify-ca" may not be used with sslrootcert=system', + ) + + if libressl: + pytest.skip("SSL_CERT_FILE is not supported with LibreSSL") + + # Override the system trust store to point at our private root CA. On a Nix + # build OpenSSL is patched so NIX_SSL_CERT_FILE takes precedence over the + # standard SSL_CERT_FILE, so both are overridden here to reproduce the Perl + # test's local $ENV{SSL_CERT_FILE} behavior. + root_ca = str(node.datadir / "root_ca.crt") + saved = {k: os.environ.get(k) for k in ("SSL_CERT_FILE", "NIX_SSL_CERT_FILE")} + os.environ["SSL_CERT_FILE"] = root_ca + if "NIX_SSL_CERT_FILE" in os.environ: + os.environ["NIX_SSL_CERT_FILE"] = root_ca + try: + node.connect_ok( + "{} sslmode=verify-full host=common-name.pg-ssltest.test".format(base), + "sslrootcert=system connects with overridden SSL_CERT_FILE", + ) + node.connect_fails( + "{} host=common-name.pg-ssltest.test.bad".format(base), + "sslrootcert=system defaults to sslmode=verify-full", + expected_stderr=r'server certificate for "common-name.pg-ssltest.test" does not match host name "common-name.pg-ssltest.test.bad"', + ) + finally: + for name, value in saved.items(): + if value is None: + os.environ.pop(name, None) + else: + os.environ[name] = value + + +def _test_server_crl(node, ssl_server): + """Client-side CRL handling and the pg_stat_ssl view without a client cert.""" + ssl_server.switch_server_cert(node, certfile="server-revoked") + common_connstr = ( + "{} user=ssltestuser dbname=trustdb hostaddr={} " + "host=common-name.pg-ssltest.test".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=verify-ca".format( + common_connstr + ), + "connects without client-side CRL", + ) + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=verify-ca " + "sslcrl=ssl/root+server.crl".format(common_connstr), + "does not connect with client-side CRL file", + expected_stderr=r"SSL error: certificate verify failed", + ) + node.connect_fails( + "{} sslcrl='' sslrootcert=ssl/root+server_ca.crt sslmode=verify-ca " + "sslcrldir=ssl/root+server-crldir".format(common_connstr), + "does not connect with client-side CRL directory", + expected_stderr=r"SSL error: certificate verify failed", + ) + + node.command_like( + [ + "psql", + "--no-psqlrc", + "--no-align", + "--field-separator", + ",", + "--pset", + "null=_null_", + "--dbname", + "{} sslrootcert=invalid".format(common_connstr), + "--command", + "SELECT * FROM pg_stat_ssl WHERE pid = pg_backend_pid()", + ], + r"(?mx)^pid,ssl,version,cipher,bits,client_dn,client_serial,issuer_dn\r?\n" + r"^\d+,t,TLSv[\d.]+,[\w-]+,\d+,_null_,_null_,_null_\r?$", + "pg_stat_ssl view without client certificate", + ) + + +def _test_protocol_versions(node): + """ssl_min/max_protocol_version negotiation and validation.""" + common_connstr = ( + "{} user=ssltestuser dbname=trustdb hostaddr={} " + "host=common-name.pg-ssltest.test".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + base = "{} sslrootcert=ssl/root+server_ca.crt sslmode=require".format( + common_connstr + ) + node.connect_ok( + "{} ssl_min_protocol_version=TLSv1.2 " + "ssl_max_protocol_version=TLSv1.2".format(base), + "connection success with correct range of TLS protocol versions", + ) + node.connect_fails( + "{} ssl_min_protocol_version=TLSv1.2 " + "ssl_max_protocol_version=TLSv1.1".format(base), + "connection failure with incorrect range of TLS protocol versions", + expected_stderr=r"invalid SSL protocol version range", + ) + node.connect_fails( + "{} ssl_min_protocol_version=incorrect_tls".format(base), + "connection failure with an incorrect SSL protocol minimum bound", + expected_stderr=r'invalid "ssl_min_protocol_version" value', + ) + node.connect_fails( + "{} ssl_max_protocol_version=incorrect_tls".format(base), + "connection failure with an incorrect SSL protocol maximum bound", + expected_stderr=r'invalid "ssl_max_protocol_version" value', + ) + + +def _cert_base(): + """Common connstr for the certificate-authorization (certdb) tests.""" + return ( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require dbname=certdb " + "hostaddr={} host=localhost".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + + +def _test_cert_authorization(node, ssl_server, supports_sslcertmode_require): + """Server-side client-certificate authorization (cert auth).""" + common = _cert_base() + key = ssl_server.sslkey + + node.connect_fails( + "{} user=ssltestuser sslcert=invalid".format(common), + "certificate authorization fails without client cert", + expected_stderr=r"connection requires a valid client certificate", + ) + node.connect_ok( + "{} user=ssltestuser sslcert=ssl/client.crt{}".format( + common, key("client.key") + ), + "certificate authorization succeeds with correct client cert in PEM format", + ) + node.connect_ok( + "{} user=ssltestuser sslcert=ssl/client.crt{}".format( + common, key("client-der.key") + ), + "certificate authorization succeeds with correct client cert in DER format", + ) + node.connect_ok( + "{} user=ssltestuser sslcert=ssl/client.crt{} sslpassword='dUmmyP^#+'".format( + common, key("client-encrypted-pem.key") + ), + "certificate authorization succeeds with correct client cert in encrypted PEM format", + ) + node.connect_ok( + "{} user=ssltestuser sslcert=ssl/client.crt{} sslpassword='dUmmyP^#+'".format( + common, key("client-encrypted-der.key") + ), + "certificate authorization succeeds with correct client cert in encrypted DER format", + ) + + if supports_sslcertmode_require: + node.connect_ok( + "{} user=ssltestuser sslcertmode=require sslcert=ssl/client.crt{}".format( + common, key("client.key") + ), + "certificate authorization succeeds with correct client cert and sslcertmode=require", + ) + node.connect_ok( + "{} user=ssltestuser sslcertmode=allow sslcert=ssl/client.crt{}".format( + common, key("client.key") + ), + "certificate authorization succeeds with correct client cert and sslcertmode=allow", + ) + node.connect_fails( + "{} user=ssltestuser sslcertmode=disable sslcert=ssl/client.crt{}".format( + common, key("client.key") + ), + "certificate authorization fails with correct client cert and sslcertmode=disable", + expected_stderr=r"connection requires a valid client certificate", + ) + node.connect_fails( + "{} user=ssltestuser sslcert=ssl/client.crt{} sslpassword='wrong'".format( + common, key("client-encrypted-pem.key") + ), + "certificate authorization fails with correct client cert and wrong password in encrypted PEM format", + expected_stderr=r'private key file ".*client-encrypted-pem\.key": bad decrypt', + ) + + _test_dn_cn_mapping(node, common, key) + _test_cert_failures(node, ssl_server, common, key, supports_sslcertmode_require) + + +def _test_dn_cn_mapping(node, common, key): + """DN/regex/CN ident mapping for certificate authentication.""" + node.connect_ok( + "{} user=ssltestuser sslcert=ssl/client-dn.crt{}".format( + common.replace("dbname=certdb", "dbname=certdb_dn"), key("client-dn.key") + ), + "certificate authorization succeeds with DN mapping", + log_like=[ + r'connection authenticated: identity="CN=ssltestuser-dn,OU=Testing,OU=Engineering,O=PGDG" method=cert' + ], + ) + node.connect_ok( + "{} user=ssltestuser sslcert=ssl/client-dn.crt{}".format( + common.replace("dbname=certdb", "dbname=certdb_dn_re"), + key("client-dn.key"), + ), + "certificate authorization succeeds with DN regex mapping", + ) + node.connect_ok( + "{} user=ssltestuser sslcert=ssl/client-dn.crt{}".format( + common.replace("dbname=certdb", "dbname=certdb_cn"), key("client-dn.key") + ), + "certificate authorization succeeds with CN mapping", + log_like=[ + r'connection authenticated: identity="CN=ssltestuser-dn,OU=Testing,OU=Engineering,O=PGDG" method=cert' + ], + ) + + +def _test_cert_failures(node, ssl_server, common, key, supports_sslcertmode_require): + """Wrong permissions, wrong user, revoked certs and verify-full/verify-ca HBA.""" + if platform.system() != "Windows": + node.connect_fails( + "{} user=ssltestuser sslcert=ssl/client.crt{}".format( + common, key("client_wrongperms.key") + ), + "certificate authorization fails because of file permissions", + expected_stderr=r'private key file ".*client_wrongperms\.key" has group or world access', + ) + + node.connect_fails( + "{} user=anotheruser sslcert=ssl/client.crt{}".format( + common, key("client.key") + ), + "certificate authorization fails with client cert belonging to another user", + expected_stderr=r'certificate authentication failed for user "anotheruser"', + log_like=[r'connection authenticated: identity="CN=ssltestuser" method=cert'], + ) + + node.connect_fails( + "{} user=ssltestuser sslcert=ssl/client-revoked.crt{}".format( + common, key("client-revoked.key") + ), + "certificate authorization fails with revoked client cert", + expected_stderr=r"SSL error: (ssl[a-z0-9/]*|tls) alert certificate revoked", + log_like=[ + r"Client certificate verification failed at depth 0: certificate revoked", + r'Failed certificate data \(unverified\): subject "/CN=ssltestuser", serial number \d+, issuer "/CN=Test CA for PostgreSQL SSL regression test client certs"', + ], + log_unlike=[r"connection authenticated:"], + ) + + _test_verify_full_ca(node, key) + _test_intermediate_ca(node, ssl_server, key) + _test_server_crl_dir(node, ssl_server, key) + _test_client_cas(node, ssl_server, key, supports_sslcertmode_require) + + +def _test_verify_full_ca(node, key): + """clientcert=verify-full vs verify-ca on the verifydb database.""" + common = ( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require dbname=verifydb " + "hostaddr={} host=localhost".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + node.connect_ok( + "{} user=ssltestuser sslcert=ssl/client.crt{}".format( + common, key("client.key") + ), + "auth_option clientcert=verify-full succeeds with matching username and Common Name", + log_like=[r'connection authenticated: user="ssltestuser" method=trust'], + ) + node.connect_fails( + "{} user=anotheruser sslcert=ssl/client.crt{}".format( + common, key("client.key") + ), + "auth_option clientcert=verify-full fails with mismatching username and Common Name", + expected_stderr=r'FATAL: .* "trust" authentication failed for user "anotheruser"', + log_unlike=[r"connection authenticated:"], + ) + node.connect_ok( + "{} user=yetanotheruser sslcert=ssl/client.crt{}".format( + common, key("client.key") + ), + "auth_option clientcert=verify-ca succeeds with mismatching username and Common Name", + log_like=[r'connection authenticated: user="yetanotheruser" method=trust'], + ) + + +def _test_intermediate_ca(node, ssl_server, key): + """Intermediate client CA provided by the client; missing/untrusted cases.""" + ssl_server.switch_server_cert(node, certfile="server-cn-only", cafile="root_ca") + base = ( + "{} user=ssltestuser dbname=certdb sslrootcert=ssl/root+server_ca.crt " + "hostaddr={} host=localhost".format(DEFAULT_SSL_CONNSTR, SERVERHOSTADDR) + ) + common = "{}{}".format(base, key("client.key")) + node.connect_ok( + "{} sslmode=require sslcert=ssl/client+client_ca.crt".format(common), + "intermediate client certificate is provided by client", + ) + node.connect_fails( + "{} sslmode=require sslcert=ssl/client.crt".format(common), + "intermediate client certificate is missing", + expected_stderr=r"SSL error: tlsv1 alert unknown ca", + log_like=[ + r"Client certificate verification failed at depth 0: unable to get local issuer certificate", + r'Failed certificate data \(unverified\): subject "/CN=ssltestuser", serial number \d+, issuer "/CN=Test CA for PostgreSQL SSL regression test client certs"', + ], + ) + node.connect_fails( + "{} sslmode=require sslcert=ssl/client-long.crt{}".format( + base, key("client-long.key") + ), + "logged client certificate Subjects are truncated if they're too long", + expected_stderr=r"SSL error: tlsv1 alert unknown ca", + log_like=[ + r"Client certificate verification failed at depth 0: unable to get local issuer certificate", + r'Failed certificate data \(unverified\): subject "\.\.\./CN=ssl-123456789012345678901234567890123456789012345678901234567890", serial number \d+, issuer "/CN=Test CA for PostgreSQL SSL regression test client certs"', + ], + ) + + # Untrusted intermediate: cert chain depth > 0 error logging. (The + # LibreSSL-specific variant of the failed-cert-data line is not exercised + # here because this build uses OpenSSL.) + ssl_server.switch_server_cert( + node, certfile="server-cn-only", cafile="server-cn-only" + ) + node.connect_fails( + "{} sslmode=require sslcert=ssl/client+client_ca.crt".format(common), + "intermediate client certificate is untrusted", + expected_stderr=r"SSL error: tlsv1 alert unknown ca", + log_like=[ + r"Client certificate verification failed at depth 1: unable to get local issuer certificate", + r'Failed certificate data \(unverified\): subject "/CN=Test CA for PostgreSQL SSL regression test client certs", serial number \d+, issuer "/CN=Test root CA for PostgreSQL SSL regression test suite"', + ], + ) + + +def _test_server_crl_dir(node, ssl_server, key): + """Server-side CRL directory revokes client certs (ASCII and UTF-8).""" + base = ( + "{} user=ssltestuser dbname=certdb sslrootcert=ssl/root+server_ca.crt " + "hostaddr={} host=localhost sslmode=require".format( + DEFAULT_SSL_CONNSTR, SERVERHOSTADDR + ) + ) + ssl_server.switch_server_cert( + node, certfile="server-cn-only", crldir="root+client-crldir" + ) + node.connect_fails( + "{} sslcert=ssl/client-revoked.crt{}".format(base, key("client-revoked.key")), + "certificate authorization fails with revoked client cert with server-side CRL directory", + expected_stderr=r"SSL error: (ssl[a-z0-9/]*|tls) alert certificate revoked", + log_like=[ + r"Client certificate verification failed at depth 0: certificate revoked", + r'Failed certificate data \(unverified\): subject "/CN=ssltestuser", serial number \d+, issuer "/CN=Test CA for PostgreSQL SSL regression test client certs"', + ], + ) + node.connect_fails( + "{} sslcert=ssl/client-revoked-utf8.crt{}".format( + base, key("client-revoked-utf8.key") + ), + "certificate authorization fails with revoked UTF-8 client cert with server-side CRL directory", + expected_stderr=r"SSL error: (ssl[a-z0-9/]*|tls) alert certificate revoked", + log_like=[ + r"Client certificate verification failed at depth 0: certificate revoked", + r'Failed certificate data \(unverified\): subject "/CN=\\xce\\x9f\\xce\\xb4\\xcf\\x85\\xcf\\x83\\xcf\\x83\\xce\\xad\\xce\\xb1\\xcf\\x82", serial number \d+, issuer "/CN=Test CA for PostgreSQL SSL regression test client certs"', + ], + ) + + +def _test_client_cas(node, ssl_server, key, supports_sslcertmode_require): + """Per-host client CA configuration (requires sslcertmode=require support).""" + if not supports_sslcertmode_require: + pytest.skip("sslmode require not supported in this build") + + connstr = ( + "user=ssltestuser dbname=certdb hostaddr={} sslmode=require sslsni=1".format( + SERVERHOSTADDR + ) + ) + + ssl_server.switch_server_cert(node, certfile="server-cn-only", cafile="") + node.connect_fails( + "{} host=example.org sslcertmode=require sslcert=ssl/client.crt{}".format( + connstr, key("client.key") + ), + "host: 'example.org', ca: '': connect with sslcert, no client CA configured", + expected_stderr=r"client certificates can only be checked if a root certificate store is available", + ) + + ssl_server.switch_server_cert( + node, certfile="server-cn-only", cafile="root+client_ca" + ) + node.connect_fails( + "{} host=example.com sslcertmode=disable".format(connstr), + "host: 'example.com', ca: 'root+client_ca.crt': connect fails if no client certificate sent", + expected_stderr=r"connection requires a valid client certificate", + ) + node.connect_ok( + "{} host=example.com sslcertmode=require sslcert=ssl/client.crt{}".format( + connstr, key("client.key") + ), + "host: 'example.com', ca: 'root+client_ca.crt': connect with sslcert, client certificate sent", + ) + + ssl_server.switch_server_cert( + node, certfile="server-cn-only", cafile="root+server_ca" + ) + node.connect_fails( + "{} host=example.net sslcertmode=disable".format(connstr), + "host: 'example.net', ca: 'root+server_ca.crt': connect fails if no client certificate sent", + expected_stderr=r"connection requires a valid client certificate", + ) + node.connect_fails( + "{} host=example.net sslcertmode=require sslcert=ssl/client.crt{}".format( + connstr, key("client.key") + ), + "host: 'example.net', ca: 'root+server_ca.crt': connect with sslcert, client certificate sent", + expected_stderr=r"unknown ca", + ) diff --git a/src/test/ssl/pyt/test_002_scram.py b/src/test/ssl/pyt/test_002_scram.py new file mode 100644 index 0000000000000..3829aeafca10d --- /dev/null +++ b/src/test/ssl/pyt/test_002_scram.py @@ -0,0 +1,168 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/ssl/t/002_scram.pl. + +SCRAM authentication and TLS channel binding over SSL: channel_binding +disable/require/invalid, MD5 rejection of channel binding, cert auth without +channel binding, require_auth interplay, and RSA-PSS server certificates. +""" + +import os +import shutil + +import pytest + +import pypg +from pypg.ssl_server import SSLServer, ssl_file_path + +# This suite opens up local TCP ports and is hidden behind PG_TEST_EXTRA=ssl. +pytestmark = pypg.require_test_extras("ssl") + +SERVERHOSTADDR = "127.0.0.1" +SERVERHOSTCIDR = "127.0.0.1/32" + + +def test_002_scram(create_pg, tmp_path): + """SCRAM over SSL: channel binding, MD5, cert auth and RSA-PSS handling.""" + if os.environ.get("with_ssl") != "openssl": + pytest.skip("OpenSSL not supported by this build") + + ssl_server = SSLServer(tmp_path) + libressl = ssl_server.is_libressl() + supports_rsapss_certs = pypg.check_pg_config( + "#define HAVE_X509_GET_SIGNATURE_INFO 1" + ) + if libressl: + # As of 5/2025, LibreSSL doesn't actually work for RSA-PSS certs. + supports_rsapss_certs = False + + node = create_pg("primary", hostaddr=SERVERHOSTADDR, start=True) + md5_works = node.psql_capture("select md5('')").rc == 0 + + ssl_server.configure_test_server_for_ssl( + node, + SERVERHOSTADDR, + SERVERHOSTCIDR, + "scram-sha-256", + password="pass", + password_enc="scram-sha-256", + ) + ssl_server.switch_server_cert(node, certfile="server-cn-only") + + old_pgpassword = os.environ.get("PGPASSWORD") + os.environ["PGPASSWORD"] = "pass" + try: + _run_scram_tests(node, ssl_server, tmp_path, md5_works, supports_rsapss_certs) + finally: + if old_pgpassword is None: + os.environ.pop("PGPASSWORD", None) + else: + os.environ["PGPASSWORD"] = old_pgpassword + + +def _run_scram_tests(node, ssl_server, tmp_path, md5_works, supports_rsapss_certs): + """The body of the SCRAM test, with PGPASSWORD set in the environment.""" + common_connstr = ( + "dbname=trustdb sslmode=require sslcert=invalid sslrootcert=invalid " + "hostaddr={} host=localhost".format(SERVERHOSTADDR) + ) + + _test_channel_binding(node, common_connstr, md5_works) + _test_cert_auth(node, tmp_path) + _test_require_auth(node, common_connstr, md5_works) + _test_rsapss(node, ssl_server, common_connstr, supports_rsapss_certs) + + +def _test_channel_binding(node, common_connstr, md5_works): + """channel_binding=disable/require/invalid and MD5 rejection.""" + node.connect_ok( + "{} user=ssltestuser".format(common_connstr), + "Basic SCRAM authentication with SSL", + ) + node.connect_fails( + "{} user=ssltestuser channel_binding=invalid_value".format(common_connstr), + "SCRAM with SSL and channel_binding=invalid_value", + expected_stderr=r'invalid channel_binding value: "invalid_value"', + ) + node.connect_ok( + "{} user=ssltestuser channel_binding=disable".format(common_connstr), + "SCRAM with SSL and channel_binding=disable", + ) + node.connect_ok( + "{} user=ssltestuser channel_binding=require".format(common_connstr), + "SCRAM with SSL and channel_binding=require", + ) + + if md5_works: + node.connect_fails( + "{} user=md5testuser channel_binding=require".format(common_connstr), + "MD5 with SSL and channel_binding=require", + expected_stderr=r"channel binding required but not supported by server's authentication request", + ) + + +def _test_cert_auth(node, tmp_path): + """cert auth and channel_binding, plus clientcert=verify-full.""" + # A unique client key copy, since ssl/client.key may be used elsewhere. + client_tmp_key = tmp_path / "client_scram.key" + shutil.copyfile(ssl_file_path("client.key"), client_tmp_key) + os.chmod(client_tmp_key, 0o600) + + node.connect_fails( + "sslcert=ssl/client.crt sslkey={key} sslrootcert=invalid hostaddr={addr} " + "host=localhost dbname=certdb user=ssltestuser channel_binding=require".format( + key=client_tmp_key, addr=SERVERHOSTADDR + ), + "Cert authentication and channel_binding=require", + expected_stderr=r"channel binding required, but server authenticated client without channel binding", + ) + + node.connect_ok( + "sslcert=ssl/client.crt sslkey={key} sslrootcert=invalid hostaddr={addr} " + "host=localhost dbname=verifydb user=ssltestuser".format( + key=client_tmp_key, addr=SERVERHOSTADDR + ), + "SCRAM with clientcert=verify-full", + log_like=[ + r'connection authenticated: identity="ssltestuser" method=scram-sha-256' + ], + ) + + +def _test_require_auth(node, common_connstr, md5_works): + """channel_binding works independently of require_auth.""" + node.connect_ok( + "{} user=ssltestuser channel_binding=disable " + "require_auth=scram-sha-256".format(common_connstr), + "SCRAM with SSL, channel_binding=disable, and require_auth=scram-sha-256", + ) + + if md5_works: + node.connect_fails( + "{} user=md5testuser require_auth=md5 channel_binding=require".format( + common_connstr + ), + "channel_binding can fail even when require_auth succeeds", + expected_stderr=r"channel binding required but not supported by server's authentication request", + ) + + node.connect_ok( + "{} user=ssltestuser channel_binding=require " + "require_auth=scram-sha-256".format(common_connstr), + "SCRAM with SSL, channel_binding=require, and require_auth=scram-sha-256", + ) + + +def _test_rsapss(node, ssl_server, common_connstr, supports_rsapss_certs): + """A server certificate using the RSA-PSS algorithm (bug #17760).""" + if not supports_rsapss_certs: + return + ssl_server.switch_server_cert(node, certfile="server-rsapss") + node.connect_ok( + "{} user=ssltestuser channel_binding=require".format(common_connstr), + "SCRAM with SSL and channel_binding=require, server certificate uses 'rsassaPss'", + log_like=[ + r'connection authenticated: identity="ssltestuser" method=scram-sha-256' + ], + ) diff --git a/src/test/ssl/pyt/test_003_sslinfo.py b/src/test/ssl/pyt/test_003_sslinfo.py new file mode 100644 index 0000000000000..44ef8bdab604b --- /dev/null +++ b/src/test/ssl/pyt/test_003_sslinfo.py @@ -0,0 +1,181 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/ssl/t/003_sslinfo.pl. + +Exercises the sslinfo extension over a TLS connection: ssl_is_used, +ssl_version, ssl_cipher, ssl_client_cert_present, ssl_client_serial, +ssl_client_dn_field, ssl_issuer_dn, ssl_issuer_field and ssl_extension_info, +cross-checked against pg_stat_ssl, plus sslcertmode handling. +""" + +import os + +import pytest + +import pypg +from pypg.ssl_server import SSLServer + +# This suite opens up local TCP ports and is hidden behind PG_TEST_EXTRA=ssl. +pytestmark = pypg.require_test_extras("ssl") + +SERVERHOSTADDR = "127.0.0.1" +SERVERHOSTCIDR = "127.0.0.1/32" + +# Defaults that protect against any ~/.postgresql certificate/key files. +DEFAULT_SSL_CONNSTR = ( + "sslkey=invalid sslcert=invalid sslrootcert=invalid " + "sslcrl=invalid sslcrldir=invalid" +) + + +def test_003_sslinfo(create_pg, tmp_path): + """sslinfo functions report the expected TLS/cert details via pg_stat_ssl.""" + if os.environ.get("with_ssl") != "openssl": + pytest.skip("OpenSSL not supported by this build") + + ssl_server = SSLServer(tmp_path) + supports_sslcertmode_require = pypg.check_pg_config( + "#define HAVE_SSL_CTX_SET_CERT_CB 1" + ) + + node = create_pg("primary", hostaddr=SERVERHOSTADDR, start=True) + ssl_server.configure_test_server_for_ssl( + node, SERVERHOSTADDR, SERVERHOSTCIDR, "trust", extensions=["sslinfo"] + ) + # server-revoked is reused as the server cert as in the 001 test; no CRLs + # are exercised here. + ssl_server.switch_server_cert(node, certfile="server-revoked") + + common_connstr = ( + "{default} sslrootcert=ssl/root+server_ca.crt sslmode=require dbname=certdb " + "hostaddr={addr} host=localhost user=ssltestuser sslcert=ssl/client_ext.crt{key}".format( + default=DEFAULT_SSL_CONNSTR, + addr=SERVERHOSTADDR, + key=ssl_server.sslkey("client_ext.key"), + ) + ) + + node.connect_ok( + common_connstr, + "certificate authorization succeeds with correct client cert in PEM format", + ) + + _test_with_cert(node, common_connstr) + _test_without_cert(node) + _test_sslcertmode(node, common_connstr, supports_sslcertmode_require) + + +def _test_with_cert(node, common_connstr): + """sslinfo functions for a connection that presents a client certificate.""" + assert ( + node.safe_psql("SELECT ssl_is_used();", connstr=common_connstr) == "t" + ), "ssl_is_used() for TLS connection" + + assert ( + node.safe_psql( + "SELECT ssl_version();", + connstr=common_connstr + + " ssl_min_protocol_version=TLSv1.2 ssl_max_protocol_version=TLSv1.2", + ) + == "TLSv1.2" + ), "ssl_version() correctly returning TLS protocol" + + assert ( + node.safe_psql( + "SELECT ssl_cipher() = cipher FROM pg_stat_ssl WHERE pid = pg_backend_pid();", + connstr=common_connstr, + ) + == "t" + ), "ssl_cipher() compared with pg_stat_ssl" + + assert ( + node.safe_psql("SELECT ssl_client_cert_present();", connstr=common_connstr) + == "t" + ), "ssl_client_cert_present() for connection with cert" + + assert ( + node.safe_psql( + "SELECT ssl_client_serial() = client_serial FROM pg_stat_ssl WHERE pid = pg_backend_pid();", + connstr=common_connstr, + ) + == "t" + ), "ssl_client_serial() compared with pg_stat_ssl" + + # Must not use safe_psql since we expect an error here (exit code 3). + result = node.psql_capture( + "SELECT ssl_client_dn_field('invalid');", connstr=common_connstr + ) + assert result.rc == 3, "ssl_client_dn_field() for an invalid field" + + assert ( + node.safe_psql( + "SELECT '/CN=' || ssl_client_dn_field('commonName') = client_dn FROM pg_stat_ssl WHERE pid = pg_backend_pid();", + connstr=common_connstr, + ) + == "t" + ), "ssl_client_dn_field() for commonName" + + assert ( + node.safe_psql( + "SELECT ssl_issuer_dn() = issuer_dn FROM pg_stat_ssl WHERE pid = pg_backend_pid();", + connstr=common_connstr, + ) + == "t" + ), "ssl_issuer_dn() for connection with cert" + + assert ( + node.safe_psql( + "SELECT '/CN=' || ssl_issuer_field('commonName') = issuer_dn FROM pg_stat_ssl WHERE pid = pg_backend_pid();", + connstr=common_connstr, + ) + == "t" + ), "ssl_issuer_field() for commonName" + + assert ( + node.safe_psql( + "SELECT value, critical FROM ssl_extension_info() WHERE name = 'basicConstraints';", + connstr=common_connstr, + ) + == "CA:FALSE|t" + ), "extract extension from cert" + + +def _test_without_cert(node): + """sslinfo functions for a TLS connection that presents no client cert.""" + trust_connstr = ( + "{default} sslrootcert=ssl/root+server_ca.crt sslmode=require " + "dbname=trustdb hostaddr={addr} user=ssltestuser host=localhost".format( + default=DEFAULT_SSL_CONNSTR, addr=SERVERHOSTADDR + ) + ) + + assert ( + node.safe_psql("SELECT ssl_client_cert_present();", connstr=trust_connstr) + == "f" + ), "ssl_client_cert_present() for connection without cert" + + assert ( + node.safe_psql( + "SELECT ssl_client_dn_field('commonName');", connstr=trust_connstr + ) + == "" + ), "ssl_client_dn_field() for connection without cert" + + +def _test_sslcertmode(node, common_connstr, supports_sslcertmode_require): + """ssl_client_cert_present() across the sslcertmode connection options.""" + cases = [ + ("sslcertmode=allow", "t"), + ("sslcertmode=allow sslcert=invalid", "f"), + ("sslcertmode=disable", "f"), + ] + if supports_sslcertmode_require: + cases.append(("sslcertmode=require", "t")) + + for opts, present in cases: + result = node.safe_psql( + "SELECT ssl_client_cert_present();", + connstr="{} dbname=trustdb {}".format(common_connstr, opts), + ) + assert result == present, "ssl_client_cert_present() for {}".format(opts) diff --git a/src/test/ssl/pyt/test_004_sni.py b/src/test/ssl/pyt/test_004_sni.py new file mode 100644 index 0000000000000..e9d160dd8ea6f --- /dev/null +++ b/src/test/ssl/pyt/test_004_sni.py @@ -0,0 +1,457 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/test/ssl/t/004_sni.pl. + +Server Name Indication (SNI) support: per-host certificate selection via +pg_hosts.conf, the ssl_sni GUC, multiple/@-included hostnames, duplicate and +malformed entry rejection, passphrase-protected keys and reload behavior, the +/no_sni/ marker, and per-host client CA verification including CRL handling. +""" + +import os +import platform + +import pytest + +import pypg +from pypg.ssl_server import SSLServer + +# This suite opens up local TCP ports and is hidden behind PG_TEST_EXTRA=ssl. +pytestmark = pypg.require_test_extras("ssl") + +# The hostaddr used to connect; the server certificate is for a fixed domain so +# this cannot be a hostname. The CIDR is used to match incoming connections. +SERVERHOSTADDR = "127.0.0.1" +SERVERHOSTCIDR = "127.0.0.1/32" + + +def _data_dir(node): + return node.datadir + + +def test_004_sni(create_pg, tmp_path): + """SNI server: pg.conf and pg_hosts.conf host/cert selection and errors.""" + if os.environ.get("with_ssl") != "openssl": + pytest.skip("OpenSSL not supported by this build") + + ssl_server = SSLServer(tmp_path) + if ssl_server.is_libressl(): + pytest.skip("SNI not supported when building with LibreSSL") + + node = create_pg("primary", hostaddr=SERVERHOSTADDR, start=True) + exec_backend = node.safe_psql("SHOW debug_exec_backend") + + ssl_server.configure_test_server_for_ssl( + node, SERVERHOSTADDR, SERVERHOSTCIDR, "trust" + ) + ssl_server.switch_server_cert(node, certfile="server-cn-only") + + connstr = "user=ssltestuser dbname=trustdb hostaddr={} sslsni=1".format( + SERVERHOSTADDR + ) + + _test_postgresql_conf(node, connstr) + _test_pg_hosts_conf(node, connstr) + _test_passphrase_reload(node, connstr, exec_backend) + _test_non_sni_only(node, connstr) + _test_client_cas(node, ssl_server) + + +def _test_postgresql_conf(node, connstr): + """The postgresql.conf branch: cert in pg.conf used until SNI flips on.""" + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require".format(connstr), + "pg.conf: connect with correct server CA cert file sslmode=require", + ) + node.connect_fails( + "{} sslrootcert=ssl/root_ca.crt sslmode=verify-ca".format(connstr), + "pg.conf: connect fails without intermediate for sslmode=verify-ca", + expected_stderr=r"certificate verify failed", + ) + + node.append_conf( + "example.org server-cn-only.crt server-cn-only.key", "pg_hosts.conf" + ) + node.reload() + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require".format(connstr), + "pg.conf: connect with correct server CA cert file sslmode=require", + ) + + node.append_conf("ssl_sni = on", "postgresql.conf") + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.reload() + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require".format(connstr), + "pg.conf: connect after deleting pg_hosts.conf", + ) + + +def _test_pg_hosts_conf(node, connstr): + """The pg_hosts.conf branch: default host, per-host CA, name lists, errors.""" + node.append_conf("* server-cn-only.crt server-cn-only.key", "pg_hosts.conf") + node.reload() + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require".format(connstr), + "pg_hosts.conf: connect to default, with correct server CA cert file sslmode=require", + ) + node.connect_fails( + "{} sslrootcert=ssl/root_ca.crt sslmode=verify-ca".format(connstr), + "pg_hosts.conf: connect to default, fail without intermediate for sslmode=verify-ca", + expected_stderr=r"certificate verify failed", + ) + + node.append_conf( + "example.org server-cn-only+server_ca.crt server-cn-only.key root_ca.crt", + "pg_hosts.conf", + ) + node.reload() + node.connect_ok( + "{} host=example.org sslrootcert=ssl/root_ca.crt sslmode=verify-ca".format( + connstr + ), + "pg_hosts.conf: connect to example.org and verify server CA", + ) + node.connect_ok( + "{} host=Example.ORG sslrootcert=ssl/root_ca.crt sslmode=verify-ca".format( + connstr + ), + "pg_hosts.conf: connect to Example.ORG and verify server CA", + ) + node.connect_fails( + "{} host=example.org sslrootcert=invalid sslmode=verify-ca".format(connstr), + "pg_hosts.conf: connect to example.org but without server root cert, sslmode=verify-ca", + expected_stderr=r'root certificate file "invalid" does not exist', + ) + node.connect_fails( + "{} sslrootcert=ssl/root_ca.crt sslmode=verify-ca".format(connstr), + "pg_hosts.conf: connect to default and fail to verify CA", + expected_stderr=r"certificate verify failed", + ) + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require".format(connstr), + "pg_hosts.conf: connect to default with sslmode=require", + ) + + _test_hostname_lists(node, connstr) + _test_invalid_pg_hosts(node) + + +def _test_hostname_lists(node, connstr): + """Multiple hostnames per entry, including @-file inclusion.""" + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf( + "example.org,example.com,example.net server-cn-only+server_ca.crt " + "server-cn-only.key root_ca.crt", + "pg_hosts.conf", + ) + node.reload() + for host in ("example.org", "example.com", "example.net"): + node.connect_ok( + "{} host={} sslrootcert=ssl/root_ca.crt sslmode=verify-ca".format( + connstr, host + ), + "pg_hosts.conf: connect to {} and verify server CA".format(host), + ) + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require host=example.se".format( + connstr + ), + "pg_hosts.conf: connect to default with sslmode=require", + expected_stderr=r"unrecognized name", + ) + + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf( + "example.org,@hostnames.txt server-cn-only+server_ca.crt " + "server-cn-only.key root_ca.crt", + "pg_hosts.conf", + ) + node.append_conf("\nexample.com\nexample.net\n", "hostnames.txt") + node.reload() + for host in ("example.org", "example.com", "example.net"): + node.connect_ok( + "{} host={} sslrootcert=ssl/root_ca.crt sslmode=verify-ca".format( + connstr, host + ), + "@hostnames.txt: connect to {} and verify server CA".format(host), + ) + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require host=example.se".format( + connstr + ), + "@hostnames.txt: connect to default with sslmode=require", + expected_stderr=r"unrecognized name", + ) + + +def _test_invalid_pg_hosts(node): + """Malformed/duplicate pg_hosts.conf entries make the server fail to start.""" + cases = [ + ( + "example.org,*,example.net server-cn-only+server_ca.crt " + "server-cn-only.key root_ca.crt", + "pg_hosts.conf: restart fails with default entry combined with hostnames", + ), + ( + "\n* server-cn-only.crt server-cn-only.key" + "\n* server-cn-only.crt server-cn-only.key\n", + "pg_hosts.conf: restart fails with two default entries", + ), + ( + "\n/no_sni/ server-cn-only.crt server-cn-only.key" + "\n/no_sni/ server-cn-only.crt server-cn-only.key\n", + "pg_hosts.conf: restart fails with two no_sni entries", + ), + ( + "\nexample.org server-cn-only.crt server-cn-only.key" + "\nexample.net server-cn-only.crt server-cn-only.key" + "\nexample.org server-cn-only.crt server-cn-only.key\n", + "pg_hosts.conf: restart fails with two identical hostname entries", + ), + ( + "\nexample.org server-cn-only.crt server-cn-only.key" + "\nexample.net,example.com,Example.org server-cn-only.crt " + "server-cn-only.key\n", + "pg_hosts.conf: restart fails with two identical hostname entries in lists", + ), + ] + for conf, msg in cases: + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf(conf, "pg_hosts.conf") + assert node.restart(fail_ok=True) is False, msg + + +def _test_passphrase_reload(node, connstr, exec_backend): + """No-default host plus passphrase-protected key reload semantics.""" + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf( + "example.org server-cn-only+server_ca.crt server-cn-only.key root_ca.crt", + "pg_hosts.conf", + ) + node.restart() + + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require sslsni=0".format( + connstr + ), + "pg_hosts.conf: connect to default with sslmode=require", + expected_stderr=r"handshake failure", + ) + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require host=example.com".format( + connstr + ), + "pg_hosts.conf: connect to default with sslmode=require", + expected_stderr=r"unrecognized name", + ) + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require host=example".format( + connstr + ), + "pg_hosts.conf: connect to 'example' with sslmode=require", + expected_stderr=r"unrecognized name", + ) + + # Wrong passphrase command: server must not start. + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf( + "localhost server-cn-only.crt server-password.key root+client_ca.crt " + '"echo wrongpassword" on', + "pg_hosts.conf", + ) + assert node.restart(fail_ok=True) is False, ( + "pg_hosts.conf: restart fails with password-protected key when using " + "the wrong passphrase command" + ) + + # Correct passphrase command: server must start. + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf( + "localhost server-cn-only.crt server-password.key root+client_ca.crt " + '"echo secret1" on', + "pg_hosts.conf", + ) + assert node.restart(fail_ok=True) is True, ( + "pg_hosts.conf: restart succeeds with password-protected key when using " + "the correct passphrase command" + ) + + localhost_connstr = ( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require host=localhost".format( + connstr + ) + ) + node.connect_ok( + localhost_connstr, + "pg_hosts.conf: connect with correct server CA cert file sslmode=require", + ) + node.reload() + node.reload() + node.connect_ok( + localhost_connstr, + "pg_hosts.conf: connect with correct server CA cert file after reloads", + ) + node.reload() + node.reload() + node.connect_ok( + localhost_connstr, + "pg_hosts.conf: connect with correct server CA cert file after more reloads", + ) + + _test_passphrase_no_reload(node, localhost_connstr, exec_backend) + + +def _test_passphrase_no_reload(node, localhost_connstr, exec_backend): + """Passphrase key without reload support: restart clean, reload warns.""" + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf( + "localhost server-cn-only.crt server-password.key root+client_ca.crt " + '"echo secret1" off', + "pg_hosts.conf", + ) + node_loglocation = node.current_log_position() + assert node.restart(fail_ok=True) is True, ( + "pg_hosts.conf: restart succeeds with password-protected key when using " + "the correct passphrase command" + ) + log = pypg.slurp_file(node.log, node_loglocation) + assert ( + "cannot be reloaded because it requires a passphrase" not in log + ), "log reload failure due to passphrase command reloading" + + windows_os = platform.system() == "Windows" + if windows_os or "on" in exec_backend: + pytest.skip("Passphrase command reload required on Windows and EXEC_BACKEND") + + node.connect_ok( + localhost_connstr, + "pg_hosts.conf: connect with correct server CA cert file sslmode=require", + ) + node_loglocation = node.current_log_position() + node.reload() + node.connect_ok( + localhost_connstr, + "pg_hosts.conf: connect with correct server CA cert file sslmode=require", + ) + log = node.wait_for_log( + r"cannot be reloaded because it requires a passphrase", node_loglocation + ) + assert log, "log reload failure due to passphrase command reloading" + + +def _test_non_sni_only(node, connstr): + """The /no_sni/ marker: only non-SNI connections are accepted.""" + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf("/no_sni/ server-cn-only.crt server-cn-only.key", "pg_hosts.conf") + node.restart() + + node.connect_ok( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require sslsni=0".format( + connstr + ), + "pg_hosts.conf: only non-SNI connections allowed", + ) + node.connect_fails( + "{} sslrootcert=ssl/root+server_ca.crt sslmode=require host=example.org".format( + connstr + ), + "pg_hosts.conf: only non-SNI connections allowed, connecting with SNI", + expected_stderr=r"unrecognized name", + ) + + +def _test_client_cas(node, ssl_server): + """Per-host client CA configuration, with global CRL dir interaction.""" + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf("ssl_ca_file = 'root+client_ca.crt'", "postgresql.conf") + node.append_conf( + "* server-cn-only.crt server-cn-only.key root+client_ca.crt", "pg_hosts.conf" + ) + node.append_conf( + "example.org server-cn-only.crt server-cn-only.key", "pg_hosts.conf" + ) + node.append_conf( + "example.com server-cn-only.crt server-cn-only.key root+client_ca.crt", + "pg_hosts.conf", + ) + node.append_conf( + "example.net server-cn-only.crt server-cn-only.key root+server_ca.crt", + "pg_hosts.conf", + ) + node.restart() + + connstr = ( + "user=ssltestuser dbname=certdb hostaddr={} sslmode=require sslsni=1".format( + SERVERHOSTADDR + ) + ) + + node.connect_fails( + "{} host=example.org sslcertmode=require sslcert=ssl/client.crt{}".format( + connstr, ssl_server.sslkey("client.key") + ), + "host: 'example.org', ca: '': connect with sslcert, no client CA configured", + expected_stderr=r"client certificates can only be checked if a root certificate store is available", + ) + node.connect_fails( + "{} host=example.com sslcertmode=disable".format(connstr), + "host: 'example.com', ca: 'root+client_ca.crt': connect fails if no client certificate sent", + expected_stderr=r"connection requires a valid client certificate", + ) + node.connect_ok( + "{} host=example.com sslcertmode=require sslcert=ssl/client.crt {}".format( + connstr, ssl_server.sslkey("client.key") + ), + "host: 'example.com', ca: 'root+client_ca.crt': connect with sslcert, client certificate sent", + ) + node.connect_fails( + "{} host=example.net sslcertmode=disable".format(connstr), + "host: 'example.net', ca: 'root+server_ca.crt': connect fails if no client certificate sent", + expected_stderr=r"connection requires a valid client certificate", + ) + node.connect_fails( + "{} host=example.net sslcertmode=require sslcert=ssl/client.crt {}".format( + connstr, ssl_server.sslkey("client.key") + ), + "host: 'example.net', ca: 'root+server_ca.crt': connect with sslcert, client certificate sent", + expected_stderr=r"unknown ca", + ) + + # Global CRL dir interacts with per-host trust. + ssl_server.switch_server_cert( + node, certfile="server-cn-only", crldir="client-crldir" + ) + node.connect_fails( + "{} host=example.com sslcertmode=require sslcert=ssl/client-revoked.crt {}".format( + connstr, ssl_server.sslkey("client-revoked.key") + ), + "host: 'example.com', ca: 'root+client_ca.crt': connect fails with revoked client cert", + expected_stderr=r"certificate revoked", + ) + + _test_client_cas_eol(node) + + +def _test_client_cas_eol(node): + """Trailing/garbage tokens in pg_hosts.conf entries fail server start.""" + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf( + "example.org server-cn-only.crt server-cn-only.key root+client_ca.crt " + '"cmd" on TRAILING_TEXT MORE_TEXT', + "pg_hosts.conf", + ) + assert ( + node.restart(fail_ok=True) is False + ), "pg_hosts.conf: restart fails with extra data at EOL" + + os.unlink(_data_dir(node) / "pg_hosts.conf") + node.append_conf( + "example.org server-cn-only.crt server-cn-only.key root+client_ca.crt " + '"cmd" notabooleanvalue', + "pg_hosts.conf", + ) + assert ( + node.restart(fail_ok=True) is False + ), "pg_hosts.conf: restart fails with non-boolean value in boolean field" diff --git a/src/test/ssl/pyt/test_client.py b/src/test/ssl/pyt/test_client.py index 4113dd21752a6..970668667885d 100644 --- a/src/test/ssl/pyt/test_client.py +++ b/src/test/ssl/pyt/test_client.py @@ -121,6 +121,7 @@ def _join(self): # racing against the test's own use of remaining_timeout(). (It's # preferable to let tests report timeouts; the stack traces will # help with debugging.) + assert self._thread is not None # set by background() self._thread.join(remaining_timeout() + 1) if self._thread.is_alive(): raise TimeoutError("background thread is still running after timeout") diff --git a/src/test/ssl/pyt/test_server.py b/src/test/ssl/pyt/test_server.py index d5cb14b6c9ac7..fe1d0eb7896ca 100644 --- a/src/test/ssl/pyt/test_server.py +++ b/src/test/ssl/pyt/test_server.py @@ -155,6 +155,7 @@ def test_direct_ssl_certificate_authentication( elif key == b"M": msg = val.decode() + assert msg is not None, "server did not send an error message" assert re.search(expected_error, msg), "server error did not match" # Terminate. From 7b77c0154d1a8570f1f37becaac056e3b931e2bc Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:35 -0400 Subject: [PATCH 17/36] pytest: port postmaster and ICU TAP suites Port src/test/postmaster (connection limits, negotiation) and src/test/icu to pytest. Co-authored-by: Greg Burd --- src/test/icu/meson.build | 6 ++ src/test/icu/pyt/test_010_database.py | 67 +++++++++++++ src/test/postmaster/meson.build | 8 ++ src/test/postmaster/pyt/test_001_basic.py | 15 +++ .../pyt/test_002_connection_limits.py | 97 +++++++++++++++++++ .../postmaster/pyt/test_003_start_stop.py | 61 ++++++++++++ src/test/postmaster/pyt/test_004_negotiate.py | 48 +++++++++ 7 files changed, 302 insertions(+) create mode 100644 src/test/icu/pyt/test_010_database.py create mode 100644 src/test/postmaster/pyt/test_001_basic.py create mode 100644 src/test/postmaster/pyt/test_002_connection_limits.py create mode 100644 src/test/postmaster/pyt/test_003_start_stop.py create mode 100644 src/test/postmaster/pyt/test_004_negotiate.py diff --git a/src/test/icu/meson.build b/src/test/icu/meson.build index d2cff55220a53..173911c9d5de4 100644 --- a/src/test/icu/meson.build +++ b/src/test/icu/meson.build @@ -4,6 +4,12 @@ tests += { 'name': 'icu', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_010_database.py', + ], + 'env': {'with_icu': icu.found() ? 'yes' : 'no'}, + }, 'tap': { 'tests': [ 't/010_database.pl', diff --git a/src/test/icu/pyt/test_010_database.py b/src/test/icu/pyt/test_010_database.py new file mode 100644 index 0000000000000..b57add4c18939 --- /dev/null +++ b/src/test/icu/pyt/test_010_database.py @@ -0,0 +1,67 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/icu/t/010_database.pl. + +ICU per-database collation: databases created with LOCALE_PROVIDER icu sort correctly by default/explicit collations, C and custom ICU locales work, and a CREATE DATABASE whose locale provider differs from its template is rejected. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import os +import pytest +import re + + +def test_010_database(create_pg): + """ICU per-database collation and locale-provider template matching.""" + if os.environ.get("with_icu") != "yes": + pytest.skip("ICU not supported by this build") + node1 = create_pg("node1", start=False) + node1.start() + node1.safe_psql( + "CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' ICU_LOCALE 'en@colCaseFirst=upper' ENCODING 'UTF8' TEMPLATE template0" + ) + node1.safe_psql( + "CREATE COLLATION upperfirst (provider = icu, locale = 'en@colCaseFirst=upper');\nCREATE TABLE icu (def text, en text COLLATE \"en-x-icu\", upfirst text COLLATE upperfirst);\nINSERT INTO icu VALUES ('a', 'a', 'a'), ('b', 'b', 'b'), ('A', 'A', 'A'), ('B', 'B', 'B');", + dbname="dbicu", + ) + assert ( + node1.safe_psql("SELECT icu_unicode_version() IS NOT NULL", dbname="dbicu") + == "t" + ), "ICU unicode version defined" + assert ( + node1.safe_psql("SELECT def FROM icu ORDER BY def", dbname="dbicu") + == "A\na\nB\nb" + ), "sort by database default locale" + assert ( + node1.safe_psql( + 'SELECT def FROM icu ORDER BY def COLLATE "en-x-icu"', dbname="dbicu" + ) + == "a\nA\nb\nB" + ), "sort by explicit collation standard" + assert ( + node1.safe_psql( + "SELECT def FROM icu ORDER BY en COLLATE upperfirst", dbname="dbicu" + ) + == "A\na\nB\nb" + ), "sort by explicit collation upper first" + assert ( + node1.psql_capture( + "CREATE DATABASE dbicu1 LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8" + ).rc + == 0 + ), "C locale works for ICU" + assert ( + node1.psql_capture( + "CREATE DATABASE dbicu2 LOCALE_PROVIDER icu LOCALE '@colStrength=primary'\n LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0 ENCODING UTF8" + ).rc + == 0 + ), "LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE are specified" + result = node1.psql_capture( + "CREATE DATABASE dbicu3 LOCALE_PROVIDER builtin LOCALE 'C' TEMPLATE dbicu" + ) + assert result.rc != 0, "locale provider must match template: exit code not 0" + assert re.search( + r"""ERROR: new locale provider \(builtin\) does not match locale provider of the template database \(icu\)""", + result.stderr, + ), "locale provider must match template: error message" diff --git a/src/test/postmaster/meson.build b/src/test/postmaster/meson.build index fa30883b601bd..2b9219d5bd3f8 100644 --- a/src/test/postmaster/meson.build +++ b/src/test/postmaster/meson.build @@ -4,6 +4,14 @@ tests += { 'name': 'postmaster', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + 'pyt/test_002_connection_limits.py', + 'pyt/test_003_start_stop.py', + 'pyt/test_004_negotiate.py', + ], + }, 'tap': { 'tests': [ 't/001_basic.pl', diff --git a/src/test/postmaster/pyt/test_001_basic.py b/src/test/postmaster/pyt/test_001_basic.py new file mode 100644 index 0000000000000..731e0ebc4f5e8 --- /dev/null +++ b/src/test/postmaster/pyt/test_001_basic.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/test/postmaster/t/001_basic.pl. + +postgres (postmaster) --help / --version / invalid-option handling. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_basic(pg_bin): + """postgres (postmaster) --help / --version / invalid-option handling..""" + pg_bin.program_help_ok("postgres") + pg_bin.program_version_ok("postgres") + pg_bin.program_options_handling_ok("postgres") diff --git a/src/test/postmaster/pyt/test_002_connection_limits.py b/src/test/postmaster/pyt/test_002_connection_limits.py new file mode 100644 index 0000000000000..54e17205e1398 --- /dev/null +++ b/src/test/postmaster/pyt/test_002_connection_limits.py @@ -0,0 +1,97 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/postmaster/t/002_connection_limits.pl. + +Connection-slot reservation tiers: with max_connections=6, +reserved_connections=2, superuser_reserved_connections=1, regular users are +refused once the unreserved slots fill (reserved for pg_use_reserved_connections +roles), then reserved-privilege users are refused (slots reserved for +superusers), then superusers hit "too many clients already". Finally dead-end +raw connections are accepted up to the listen backlog. +""" + +import struct +import sys + +import pytest + + +def _bg_as(node, user): + return node.background_psql( + "postgres", on_error_stop=True, extra_params=["--username", user] + ) + + +def _connect_fails_wait(node, connstr, test_name, expected_stderr): + log_location = node.current_log_position() + node.connect_fails(connstr, test_name, expected_stderr=expected_stderr) + node.wait_for_log( + r"DEBUG: (00000: )?client backend.*exited with exit code 1", log_location + ) + + +@pytest.mark.skipif(sys.platform == "win32", reason="requires raw_connect()") +def test_002_connection_limits(create_pg): + """Reserved-connection tiers refuse the right roles as slots fill.""" + node = create_pg( + "primary", + auth_extra=[ + "--create-role", + "regress_regular,regress_reserved,regress_superuser", + ], + start=False, + ) + for line in ( + "max_connections = 6", + "reserved_connections = 2", + "superuser_reserved_connections = 1", + "log_connections = 'receipt,authentication,authorization'", + "log_min_messages=debug2", + ): + node.append_conf(line) + node.start() + node.safe_psql( + "CREATE USER regress_regular LOGIN;\nCREATE USER regress_reserved LOGIN;\n" + "GRANT pg_use_reserved_connections TO regress_reserved;\n" + "CREATE USER regress_superuser LOGIN SUPERUSER;\n" + ) + node.restart() + sessions = [] + sessions.append(_bg_as(node, "regress_regular")) + sessions.append(_bg_as(node, "regress_regular")) + sessions.append(_bg_as(node, "regress_regular")) + _connect_fails_wait( + node, + "dbname=postgres user=regress_regular", + "regular connections limit", + r"FATAL: remaining connection slots are reserved for roles with " + r'privileges of the "pg_use_reserved_connections" role', + ) + sessions.append(_bg_as(node, "regress_reserved")) + sessions.append(_bg_as(node, "regress_reserved")) + _connect_fails_wait( + node, + "dbname=postgres user=regress_reserved", + "reserved_connections limit", + r"FATAL: remaining connection slots are reserved for roles with the " + r"SUPERUSER attribute", + ) + sessions.append(_bg_as(node, "regress_superuser")) + _connect_fails_wait( + node, + "dbname=postgres user=regress_superuser", + "superuser_reserved_connections limit", + r"FATAL: sorry, too many clients already", + ) + raw_connections = [] + if node.raw_connect_works(): + negotiate_ssl = struct.pack(">IHH", 8, 1234, 5679) + for i in range(21): + sock = node.raw_connect() + sock.send(negotiate_ssl) + assert sock.recv(1) == b"N", "dead-end connection {}".format(i) + raw_connections.append(sock) + for session in sessions: + session.quit() + for sock in raw_connections: + sock.close() diff --git a/src/test/postmaster/pyt/test_003_start_stop.py b/src/test/postmaster/pyt/test_003_start_stop.py new file mode 100644 index 0000000000000..7224a562e6708 --- /dev/null +++ b/src/test/postmaster/pyt/test_003_start_stop.py @@ -0,0 +1,61 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group + +"""Port of src/test/postmaster/t/003_start_stop.pl. + +With a tiny connection budget, opening dead-end raw connections (each probing +SSL negotiation, which the postmaster rejects with 'N' once the backend is +launched) eventually exhausts the slots, so a real connection is rejected with +"too many clients already". After a fast stop and restart, normal connections +work again. +""" + +import struct +import sys + +import pytest + +import pypg + + +@pytest.mark.skipif(sys.platform == "win32", reason="requires raw_connect()") +def test_003_start_stop(create_pg): + """Dead-end connections exhaust slots; restart restores connectivity.""" + auth_timeout = max(pypg.test_timeout_default(), 600) + node = create_pg("main", start=False) + for line in ( + "max_connections = 5", + "max_wal_senders = 0", + "autovacuum_max_workers = 1", + "max_worker_processes = 1", + "log_connections = 'receipt,authentication,authorization'", + "log_min_messages = debug2", + "authentication_timeout = '{} s'".format(auth_timeout), + "trace_connection_negotiation=on", + ): + node.append_conf(line) + node.start() + if not node.raw_connect_works(): + pytest.skip("this test requires working raw_connect()") + node.restart() + negotiate_ssl = struct.pack(">IHH", 8, 1234, 5679) + raw_connections = [] + for i in range(21): + sock = node.raw_connect() + # Probe SSL negotiation before opening the next connection: the server + # rejects it with 'N', proving the backend was launched and we can open + # another connection reliably. + sock.send(negotiate_ssl) + assert sock.recv(1) == b"N", "dead-end connection {}".format(i) + raw_connections.append(sock) + node.connect_fails( + "dbname=postgres user=invalid_user", + "connection is rejected when all slots are in use", + expected_stderr=r"FATAL: sorry, too many clients already", + ) + extra = node.raw_connect() + node.stop("fast") + node.start() + node.connect_ok("dbname=postgres", "works after restart") + for sock in raw_connections: + sock.close() + extra.close() diff --git a/src/test/postmaster/pyt/test_004_negotiate.py b/src/test/postmaster/pyt/test_004_negotiate.py new file mode 100644 index 0000000000000..d87c840faaf57 --- /dev/null +++ b/src/test/postmaster/pyt/test_004_negotiate.py @@ -0,0 +1,48 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of src/test/postmaster/t/004_negotiate.pl. + +On a raw connection the server rejects SSLRequest and GSSENCRequest packets with +'N' (no SSL/GSS support in this build). After both were tried, a further +SSLRequest must not re-enter SSL negotiation; the server logs the unsupported +protocol and stays alive, still accepting normal connections. +""" + +import struct +import sys + +import pytest + + +@pytest.mark.skipif(sys.platform == "win32", reason="requires raw_connect()") +def test_004_negotiate(create_pg): + """Server rejects SSL/GSS requests and survives a bad negotiation attempt.""" + node = create_pg("main", start=False) + node.append_conf("log_min_messages = debug2") + node.append_conf("log_connections = 'receipt,authentication,authorization'") + node.append_conf("trace_connection_negotiation=on") + node.start() + if not node.raw_connect_works(): + pytest.skip("this test requires working raw_connect()") + sock = node.raw_connect() + ssl_request = struct.pack(">IHH", 8, 1234, 5679) + gss_request = struct.pack(">IHH", 8, 1234, 5680) + sock.send(ssl_request) + if sock.recv(1) != b"N": + sock.close() + pytest.skip("server accepted SSL; test requires SSL to be rejected") + sock.send(gss_request) + if sock.recv(1) != b"N": + sock.close() + pytest.skip("server accepted GSS; test requires GSS to be rejected") + log_offset = node.current_log_position() + sock.send(ssl_request) + reply = sock.recv(1024) + assert ( + reply != b"N" + ), "server does not re-enter SSL negotiation after SSL+GSS were both tried" + sock.close() + node.wait_for_log(r"FATAL: .* unsupported frontend protocol 1234.5679", log_offset) + assert node.safe_psql("select 1;") == "1", "server able to accept connection" + assert node.is_alive(), "server still running after negotiation attempt" + node.stop() From c46374a0030102b4dacf0df735f9e5dccda9142e Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:35 -0400 Subject: [PATCH 18/36] pytest: port libpq, libpq-oauth, and ecpg TAP suites Port the interface-library TAP suites (libpq negotiate-encryption and load-balance, libpq-oauth, ecpg) to pytest. The mock OAuth server is shared via the oauth_validator helper. Co-authored-by: Greg Burd --- src/interfaces/ecpg/preproc/meson.build | 7 + .../preproc/pyt/test_001_ecpg_err_warn_msg.py | 39 + .../test_002_ecpg_err_warn_msg_informix.py | 22 + src/interfaces/libpq-oauth/meson.build | 6 + .../libpq-oauth/pyt/test_001_oauth.py | 18 + src/interfaces/libpq/meson.build | 12 + src/interfaces/libpq/pyt/test_001_uri.py | 302 ++++++++ src/interfaces/libpq/pyt/test_002_api.py | 23 + .../pyt/test_005_negotiate_encryption.py | 682 ++++++++++++++++++ src/interfaces/libpq/pyt/test_006_service.py | 330 +++++++++ src/interfaces/libpq/pyt/test_load_balance.py | 6 +- .../libpq/t/003_load_balance_host_list.pl | 94 +++ .../libpq/t/004_load_balance_dns.pl | 144 ++++ 13 files changed, 1682 insertions(+), 3 deletions(-) create mode 100644 src/interfaces/ecpg/preproc/pyt/test_001_ecpg_err_warn_msg.py create mode 100644 src/interfaces/ecpg/preproc/pyt/test_002_ecpg_err_warn_msg_informix.py create mode 100644 src/interfaces/libpq-oauth/pyt/test_001_oauth.py create mode 100644 src/interfaces/libpq/pyt/test_001_uri.py create mode 100644 src/interfaces/libpq/pyt/test_002_api.py create mode 100644 src/interfaces/libpq/pyt/test_005_negotiate_encryption.py create mode 100644 src/interfaces/libpq/pyt/test_006_service.py create mode 100644 src/interfaces/libpq/t/003_load_balance_host_list.pl create mode 100644 src/interfaces/libpq/t/004_load_balance_dns.pl diff --git a/src/interfaces/ecpg/preproc/meson.build b/src/interfaces/ecpg/preproc/meson.build index 3a56e2bb4ef1e..2cdfe5e49ae86 100644 --- a/src/interfaces/ecpg/preproc/meson.build +++ b/src/interfaces/ecpg/preproc/meson.build @@ -91,6 +91,13 @@ tests += { 'name': 'ecpg', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_ecpg_err_warn_msg.py', + 'pyt/test_002_ecpg_err_warn_msg_informix.py', + ], + 'deps': [ecpg_exe], + }, 'tap': { 'tests': [ 't/001_ecpg_err_warn_msg.pl', diff --git a/src/interfaces/ecpg/preproc/pyt/test_001_ecpg_err_warn_msg.py b/src/interfaces/ecpg/preproc/pyt/test_001_ecpg_err_warn_msg.py new file mode 100644 index 0000000000000..0bd2afb40c6b5 --- /dev/null +++ b/src/interfaces/ecpg/preproc/pyt/test_001_ecpg_err_warn_msg.py @@ -0,0 +1,39 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/interfaces/ecpg/preproc/t/001_ecpg_err_warn_msg.pl. + +ecpg preprocessor error/warning messages: ecpg with no arguments fails, and compiling t/err_warn_msg.pgc fails with the expected diagnostic set. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_ecpg_err_warn_msg(pg_bin): + """ecpg preprocessor error/warning messages.""" + pg_bin.program_help_ok("ecpg") + pg_bin.program_version_ok("ecpg") + pg_bin.program_options_handling_ok("ecpg") + pg_bin.command_fails(["ecpg"], "ecpg without arguments fails") + pg_bin.command_checks_all( + ["ecpg", "t/err_warn_msg.pgc"], + 3, + [r""""""], + [ + r"""ERROR: AT option not allowed in CONNECT statement""", + r"""ERROR: AT option not allowed in DISCONNECT statement""", + r"""ERROR: AT option not allowed in SET CONNECTION statement""", + r"""ERROR: AT option not allowed in TYPE statement""", + r"""ERROR: AT option not allowed in WHENEVER statement""", + r"""ERROR: AT option not allowed in VAR statement""", + r"""WARNING: COPY FROM STDIN is not implemented""", + r"""ERROR: using variable "cursor_var" in different declare statements is not supported""", + r"""ERROR: cursor "duplicate_cursor" is already defined""", + r"""ERROR: SHOW ALL is not implemented""", + r"""WARNING: no longer supported LIMIT""", + r"""WARNING: cursor "duplicate_cursor" has been declared but not opened""", + r"""WARNING: cursor "duplicate_cursor" has been declared but not opened""", + r"""WARNING: cursor ":cursor_var" has been declared but not opened""", + r"""WARNING: cursor ":cursor_var" has been declared but not opened""", + ], + "ecpg with errors and warnings", + ) diff --git a/src/interfaces/ecpg/preproc/pyt/test_002_ecpg_err_warn_msg_informix.py b/src/interfaces/ecpg/preproc/pyt/test_002_ecpg_err_warn_msg_informix.py new file mode 100644 index 0000000000000..91d25bb4233b2 --- /dev/null +++ b/src/interfaces/ecpg/preproc/pyt/test_002_ecpg_err_warn_msg_informix.py @@ -0,0 +1,22 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/interfaces/ecpg/preproc/t/002_ecpg_err_warn_msg_informix.pl. + +ecpg preprocessor error/warning messages in INFORMIX mode: compiling t/err_warn_msg_informix.pgc fails with the expected diagnostic set. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_002_ecpg_err_warn_msg_informix(pg_bin): + """ecpg preprocessor error/warning messages in INFORMIX mode.""" + pg_bin.command_checks_all( + ["ecpg", "-C", "INFORMIX", "t/err_warn_msg_informix.pgc"], + 3, + [r""""""], + [ + r"""ERROR: AT option not allowed in CLOSE DATABASE statement""", + r"""ERROR: "database" cannot be used as cursor name in INFORMIX mode""", + ], + "ecpg in INFORMIX mode with errors and warnings", + ) diff --git a/src/interfaces/libpq-oauth/meson.build b/src/interfaces/libpq-oauth/meson.build index ea3a900f4f18a..41721f5ddae15 100644 --- a/src/interfaces/libpq-oauth/meson.build +++ b/src/interfaces/libpq-oauth/meson.build @@ -93,4 +93,10 @@ tests += { ], 'deps': libpq_oauth_test_deps, }, + 'pytest': { + 'tests': [ + 'pyt/test_001_oauth.py', + ], + 'deps': libpq_oauth_test_deps, + }, } diff --git a/src/interfaces/libpq-oauth/pyt/test_001_oauth.py b/src/interfaces/libpq-oauth/pyt/test_001_oauth.py new file mode 100644 index 0000000000000..3dd414f9a7640 --- /dev/null +++ b/src/interfaces/libpq-oauth/pyt/test_001_oauth.py @@ -0,0 +1,18 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/interfaces/libpq-oauth/t/001_oauth.pl. + +Defers entirely to the oauth_tests C executable (a unit-test program for the +libpq-oauth client flow). The wrapper runs it and requires a successful exit; +its stdout/stderr are captured and surfaced on failure for debugging. +""" + +import sys + + +def test_001_oauth(pg_bin): + """The oauth_tests executable must run to a successful exit.""" + result = pg_bin.result(["oauth_tests"]) + sys.stdout.write(result.stdout) + sys.stderr.write(result.stderr) + assert result.rc == 0, "oauth_tests returned {}".format(result.rc) diff --git a/src/interfaces/libpq/meson.build b/src/interfaces/libpq/meson.build index 62cde97d16931..161e2e3dd678e 100644 --- a/src/interfaces/libpq/meson.build +++ b/src/interfaces/libpq/meson.build @@ -157,6 +157,8 @@ tests += { 'tests': [ 't/001_uri.pl', 't/002_api.pl', + 't/003_load_balance_host_list.pl', + 't/004_load_balance_dns.pl', 't/005_negotiate_encryption.pl', 't/006_service.pl', ], @@ -170,7 +172,17 @@ tests += { 'pytest': { 'tests': [ 'pyt/test_load_balance.py', + 'pyt/test_002_api.py', + 'pyt/test_001_uri.py', + 'pyt/test_005_negotiate_encryption.py', + 'pyt/test_006_service.py', ], + 'env': { + 'with_ssl': ssl_library, + 'with_gssapi': gssapi.found() ? 'yes' : 'no', + 'with_krb_srvnam': 'postgres', + }, + 'deps': libpq_test_deps, }, } diff --git a/src/interfaces/libpq/pyt/test_001_uri.py b/src/interfaces/libpq/pyt/test_001_uri.py new file mode 100644 index 0000000000000..e8d25fc702729 --- /dev/null +++ b/src/interfaces/libpq/pyt/test_001_uri.py @@ -0,0 +1,302 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of src/interfaces/libpq/t/001_uri.pl. + +Drives the libpq_uri_regress helper with a long table of connection URIs and +checks, for each, the program's stdout (the parsed connection options), stderr +(libpq's URI parse error, if any) and exit status. A non-empty expected stderr +means the URI is invalid and the program is expected to exit non-zero. + +Some entries override environment variables for the duration of the test; the +three sslmode entries set PGSSLROOTCERT=system to exercise the system-CA path, +where the default sslmode becomes verify-full. +""" + +from pypg import PgBin + +# pylint: disable=line-too-long +# The URI strings and libpq's verbatim error messages cannot be wrapped without +# changing the assertions, so long lines are tolerated in this module. + +# List of URI tests. For each test the first element is the input string, the +# second the expected stdout and the third the expected stderr. An optional +# fourth element is a dict of environment variables to override for the test. +_TESTS = [ + ( + r"postgresql://uri-user:secret@host:12345/db", + r"user='uri-user' password='secret' dbname='db' host='host' port='12345' (inet)", + r"", + ), + ( + r"postgresql://uri-user@host:12345/db", + r"user='uri-user' dbname='db' host='host' port='12345' (inet)", + r"", + ), + ( + r"postgresql://uri-user@host/db", + r"user='uri-user' dbname='db' host='host' (inet)", + r"", + ), + ( + r"postgresql://host:12345/db", + r"dbname='db' host='host' port='12345' (inet)", + r"", + ), + (r"postgresql://host/db", r"dbname='db' host='host' (inet)", r""), + ( + r"postgresql://uri-user@host:12345/", + r"user='uri-user' host='host' port='12345' (inet)", + r"", + ), + ( + r"postgresql://uri-user@host/", + r"user='uri-user' host='host' (inet)", + r"", + ), + (r"postgresql://uri-user@", r"user='uri-user' (local)", r""), + (r"postgresql://host:12345/", r"host='host' port='12345' (inet)", r""), + (r"postgresql://host:12345", r"host='host' port='12345' (inet)", r""), + (r"postgresql://host/db", r"dbname='db' host='host' (inet)", r""), + (r"postgresql://host/", r"host='host' (inet)", r""), + (r"postgresql://host", r"host='host' (inet)", r""), + (r"postgresql://", r"(local)", r""), + ( + r"postgresql://?hostaddr=127.0.0.1", + r"hostaddr='127.0.0.1' (inet)", + r"", + ), + ( + r"postgresql://example.com?hostaddr=63.1.2.4", + r"host='example.com' hostaddr='63.1.2.4' (inet)", + r"", + ), + (r"postgresql://%68ost/", r"host='host' (inet)", r""), + ( + r"postgresql://host/db?user=uri-user", + r"user='uri-user' dbname='db' host='host' (inet)", + r"", + ), + ( + r"postgresql://host/db?user=uri-user&port=12345", + r"user='uri-user' dbname='db' host='host' port='12345' (inet)", + r"", + ), + ( + r"postgresql://host/db?u%73er=someotheruser&port=12345", + r"user='someotheruser' dbname='db' host='host' port='12345' (inet)", + r"", + ), + ( + r"postgresql://host/db?u%7aer=someotheruser&port=12345", + r"", + r'libpq_uri_regress: invalid URI query parameter: "uzer"', + ), + ( + r"postgresql://host:12345?user=uri-user", + r"user='uri-user' host='host' port='12345' (inet)", + r"", + ), + ( + r"postgresql://host?user=uri-user", + r"user='uri-user' host='host' (inet)", + r"", + ), + ( + # Leading and trailing spaces, works. + r"postgresql://host? user = uri-user & port = 12345 ", + r"user='uri-user' host='host' port='12345' (inet)", + r"", + ), + ( + # Trailing data in parameter. + r"postgresql://host? user user = uri & port = 12345 12 ", + r"", + r'libpq_uri_regress: unexpected spaces found in " user user ", use percent-encoded spaces (%20) instead', + ), + ( + # Trailing data in value. + r"postgresql://host? user = uri-user & port = 12345 12 ", + r"", + r'libpq_uri_regress: unexpected spaces found in " 12345 12 ", use percent-encoded spaces (%20) instead', + ), + (r"postgresql://host?", r"host='host' (inet)", r""), + ( + r"postgresql://[::1]:12345/db", + r"dbname='db' host='::1' port='12345' (inet)", + r"", + ), + (r"postgresql://[::1]/db", r"dbname='db' host='::1' (inet)", r""), + ( + r"postgresql://[2001:db8::1234]/", + r"host='2001:db8::1234' (inet)", + r"", + ), + ( + r"postgresql://[200z:db8::1234]/", + r"host='200z:db8::1234' (inet)", + r"", + ), + (r"postgresql://[::1]", r"host='::1' (inet)", r""), + (r"postgres://", r"(local)", r""), + (r"postgres:///", r"(local)", r""), + (r"postgres:///db", r"dbname='db' (local)", r""), + ( + r"postgres://uri-user@/db", + r"user='uri-user' dbname='db' (local)", + r"", + ), + ( + r"postgres://?host=/path/to/socket/dir", + r"host='/path/to/socket/dir' (local)", + r"", + ), + ( + r"postgresql://host?uzer=", + r"", + r'libpq_uri_regress: invalid URI query parameter: "uzer"', + ), + ( + r"postgre://", + r"", + r'libpq_uri_regress: missing "=" after "postgre://" in connection info string', + ), + ( + r"postgres://[::1", + r"", + r'libpq_uri_regress: end of string reached when looking for matching "]" in IPv6 host address in URI: "postgres://[::1"', + ), + ( + r"postgres://[]", + r"", + r'libpq_uri_regress: IPv6 host address may not be empty in URI: "postgres://[]"', + ), + ( + r"postgres://[::1]z", + r"", + r'libpq_uri_regress: unexpected character "z" at position 17 in URI (expected ":" or "/"): "postgres://[::1]z"', + ), + ( + r"postgresql://host?zzz", + r"", + r'libpq_uri_regress: missing key/value separator "=" in URI query parameter: "zzz"', + ), + ( + r"postgresql://host?value1&value2", + r"", + r'libpq_uri_regress: missing key/value separator "=" in URI query parameter: "value1"', + ), + ( + r"postgresql://host?key=key=value", + r"", + r'libpq_uri_regress: extra key/value separator "=" in URI query parameter: "key"', + ), + ( + r"postgres://host?dbname=%XXfoo", + r"", + r'libpq_uri_regress: invalid percent-encoded token: "%XXfoo"', + ), + ( + r"postgresql://a%00b", + r"", + r'libpq_uri_regress: forbidden value %00 in percent-encoded value: "a%00b"', + ), + ( + r"postgresql://%zz", + r"", + r'libpq_uri_regress: invalid percent-encoded token: "%zz"', + ), + ( + r"postgresql://%1", + r"", + r'libpq_uri_regress: invalid percent-encoded token: "%1"', + ), + ( + r"postgresql://%", + r"", + r'libpq_uri_regress: invalid percent-encoded token: "%"', + ), + (r"postgres://@host", r"host='host' (inet)", r""), + (r"postgres://host:/", r"host='host' (inet)", r""), + (r"postgres://:12345/", r"port='12345' (local)", r""), + ( + r"postgres://otheruser@?host=/no/such/directory", + r"user='otheruser' host='/no/such/directory' (local)", + r"", + ), + ( + r"postgres://otheruser@/?host=/no/such/directory", + r"user='otheruser' host='/no/such/directory' (local)", + r"", + ), + ( + r"postgres://otheruser@:12345?host=/no/such/socket/path", + r"user='otheruser' host='/no/such/socket/path' port='12345' (local)", + r"", + ), + ( + r"postgres://otheruser@:12345/db?host=/path/to/socket", + r"user='otheruser' dbname='db' host='/path/to/socket' port='12345' (local)", + r"", + ), + ( + r"postgres://:12345/db?host=/path/to/socket", + r"dbname='db' host='/path/to/socket' port='12345' (local)", + r"", + ), + ( + r"postgres://:12345?host=/path/to/socket", + r"host='/path/to/socket' port='12345' (local)", + r"", + ), + ( + r"postgres://%2Fvar%2Flib%2Fpostgresql/dbname", + r"dbname='dbname' host='/var/lib/postgresql' (local)", + r"", + ), + # Usually the default sslmode is 'prefer' (for libraries with SSL) or + # 'disable' (for those without). This default changes to 'verify-full' if + # the system CA store is in use. + ( + r"postgresql://host?sslmode=disable", + r"host='host' sslmode='disable' (inet)", + r"", + {"PGSSLROOTCERT": "system"}, + ), + ( + r"postgresql://host?sslmode=prefer", + r"host='host' sslmode='prefer' (inet)", + r"", + {"PGSSLROOTCERT": "system"}, + ), + ( + r"postgresql://host?sslmode=verify-full", + r"host='host' (inet)", + r"", + {"PGSSLROOTCERT": "system"}, + ), +] + + +def _run_uri(pg_bin, uri, envvars): + """Run libpq_uri_regress for uri with envvars overridden; return result. + + The helper is built but not installed, so it lives on PATH (the meson test + harness prepends the build's test directory). Mirrors the IPC::Run::run call + in the Perl original, chomping a single trailing newline off each stream. + """ + result = pg_bin.run_command(["libpq_uri_regress", uri], extra_env=envvars) + return result + + +def test_001_uri(pg_bin: PgBin): + """Each URI parses to the expected options or fails with the expected error.""" + for entry in _TESTS: + uri, expected_stdout, expected_stderr = entry[0], entry[1], entry[2] + envvars = entry[3] if len(entry) > 3 else {} + result = _run_uri(pg_bin, uri, envvars) + + expected_exit = 0 if expected_stderr == "" else 1 + actual_exit = 0 if result.rc == 0 else 1 + assert actual_exit == expected_exit, "{}: exit status".format(uri) + assert result.stdout == expected_stdout, "{}: stdout".format(uri) + assert result.stderr == expected_stderr, "{}: stderr".format(uri) diff --git a/src/interfaces/libpq/pyt/test_002_api.py b/src/interfaces/libpq/pyt/test_002_api.py new file mode 100644 index 0000000000000..9c8f55e8638ea --- /dev/null +++ b/src/interfaces/libpq/pyt/test_002_api.py @@ -0,0 +1,23 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of src/interfaces/libpq/t/002_api.pl. + +libpq C API smoke test via the libpq_testclient helper: PQsslAttribute(NULL, "library") returns the SSL library name when built with OpenSSL, otherwise reports SSL is not enabled. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import os + + +def test_002_api(pg_bin): + """libpq PQsslAttribute(NULL, library) behavior with/without OpenSSL.""" + result = pg_bin.run_command(["libpq_testclient", "--ssl"]) + if os.environ.get("with_ssl") == "openssl": + assert ( + result.stdout == "OpenSSL" + ), 'PQsslAttribute(NULL, "library") returns "OpenSSL"' + else: + assert ( + result.stderr == "SSL is not enabled" + ), 'PQsslAttribute(NULL, "library") returns NULL' diff --git a/src/interfaces/libpq/pyt/test_005_negotiate_encryption.py b/src/interfaces/libpq/pyt/test_005_negotiate_encryption.py new file mode 100644 index 0000000000000..31162f8a14cc6 --- /dev/null +++ b/src/interfaces/libpq/pyt/test_005_negotiate_encryption.py @@ -0,0 +1,682 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of src/interfaces/libpq/t/005_negotiate_encryption.pl. + +Tests negotiation of SSL and GSSAPI encryption across the cube of libpq client +options that affect protocol negotiation (gssencmode, sslmode, sslnegotiation), +the server accepting or rejecting authentication via pg_hba.conf, and SSL/GSS +being enabled or disabled in the server. + +The approach is table-driven: each combination is a line listing the options and +an expected outcome (whether the connection succeeds and which encryption it +uses) plus a condensed trace of the negotiation EVENTS, scraped from the server +log. That catches useless retries or wrong-order negotiation even when the final +outcome is unaffected. + +This test is gated on PG_TEST_EXTRA containing "libpq_encryption" (it uses TCP +with trust auth, which is potentially unsafe on multiuser systems). The GSSAPI +matrices additionally require a GSSAPI build plus "kerberos" in PG_TEST_EXTRA, +and the SSL matrices require an OpenSSL build. Sections whose prerequisites are +absent are skipped exactly as in the Perl original, so on a build without SSL +and GSS only the plain-negotiation matrix runs. +""" + +import os +import pathlib +import shutil + +import pytest + +import pypg +from pypg.util import slurp_file + +_HOST = "enc-test-localhost.postgresql.example.com" +_HOSTADDR = "127.0.0.1" +_SERVERCIDR = "127.0.0.1/32" + +_ALL_TEST_USERS = ["testuser", "ssluser", "nossluser", "gssuser", "nogssuser"] +_ALL_GSSENCMODES = ["disable", "prefer", "require"] +_ALL_SSLMODES = ["disable", "allow", "prefer", "require"] +_ALL_SSLNEGOTIATIONS = ["postgres", "direct"] + +_CURRENT_ENC_FN = """ +CREATE FUNCTION current_enc() RETURNS text LANGUAGE plpgsql AS $$ +DECLARE + ssl_in_use bool; + gss_in_use bool; +BEGIN + ssl_in_use = (SELECT ssl FROM pg_stat_ssl WHERE pid = pg_backend_pid()); + gss_in_use = (SELECT encrypted FROM pg_stat_gssapi WHERE pid = pg_backend_pid()); + + raise log 'ssl % gss %', ssl_in_use, gss_in_use; + + IF ssl_in_use AND gss_in_use THEN + RETURN 'ssl+gss'; -- shouldn't happen + ELSIF ssl_in_use THEN + RETURN 'ssl'; + ELSIF gss_in_use THEN + RETURN 'gss'; + ELSE + RETURN 'plain'; + END IF; +END; +$$; +""" + + +class _Env: + """Shared per-run state: the node, host, and SSL/GSS capability flags.""" + + def __init__(self, node, ssl_supported, gss_supported, injection_points, unixdir): + self.node = node + self.ssl_supported = ssl_supported + self.gss_supported = gss_supported + self.injection_points = injection_points + self.unixdir = unixdir + self.failures = [] + + +def _parse_log_events(log_contents): + """Scrape the server log for the negotiation events of the test tables. + + Mirrors parse_log_events in the Perl original. Returns the ordered list of + event tokens, or ["-"] when no events are present. + """ + events = [] + for line in log_contents.split("\n"): + if "connection received" in line: + events.append("reconnect" if events else "connect") + if "SSLRequest accepted" in line: + events.append("sslaccept") + if "SSLRequest rejected" in line: + events.append("sslreject") + if "direct SSL connection accepted" in line: + events.append("directsslaccept") + if "direct SSL connection rejected" in line: + events.append("directsslreject") + if "GSSENCRequest accepted" in line: + events.append("gssaccept") + if "GSSENCRequest rejected" in line: + events.append("gssreject") + if "no pg_hba.conf entry" in line: + events.append("authfail") + if "connection authenticated" in line: + events.append("authok") + if "error triggered for injection point backend-" in line: + events.append("backenderror") + if "protocol version 2 error triggered" in line: + events.append("v2error") + if not events: + events.append("-") + return events + + +def _expand_expected_line(user, gssencmode, sslmode, sslnegotiation, expected): + """Expand any '*' wildcard fields into all of their possible values. + + Mirrors expand_expected_line: returns a dict mapping + "user gssencmode sslmode sslnegotiation" to the expected events-and-outcome. + """ + result = {} + if user == "*": + for value in _ALL_TEST_USERS: + result.update( + _expand_expected_line( + value, gssencmode, sslmode, sslnegotiation, expected + ) + ) + elif gssencmode == "*": + for value in _ALL_GSSENCMODES: + result.update( + _expand_expected_line(user, value, sslmode, sslnegotiation, expected) + ) + elif sslmode == "*": + for value in _ALL_SSLMODES: + result.update( + _expand_expected_line(user, gssencmode, value, sslnegotiation, expected) + ) + elif sslnegotiation == "*": + for value in _ALL_SSLNEGOTIATIONS: + result.update( + _expand_expected_line(user, gssencmode, sslmode, value, expected) + ) + else: + result["{} {} {} {}".format(user, gssencmode, sslmode, sslnegotiation)] = ( + expected + ) + return result + + +def _parse_table(table): + """Parse a test table into a dict of expected events-and-outcomes. + + Mirrors parse_table: trims comments and whitespace, ignores empty lines, and + expands wildcards and the '.' (same-as-previous) shorthand. + """ + expected = {} + user = gssencmode = sslmode = sslnegotiation = None + for raw_line in table.split("\n"): + line = raw_line.split("#", 1)[0].strip() + if line == "": + continue + fields = line.split(None, 4) + if len(fields) != 5: + raise ValueError('could not parse line "{}"'.format(line)) + col_user, col_gss, col_ssl, col_neg, rest = fields + if col_user != ".": + user = col_user + if col_gss != ".": + gssencmode = col_gss + if col_ssl != ".": + sslmode = col_ssl + if col_neg != ".": + sslnegotiation = col_neg + + events_part, outcome = rest.split("->") + events = [e.strip() for e in events_part.split(",")] + events_str = ", ".join(events).rstrip() + events_and_outcome = "{} -> {}".format(events_str, outcome.strip()) + + expected.update( + _expand_expected_line( + user, gssencmode, sslmode, sslnegotiation, events_and_outcome + ) + ) + return expected + + +def _connect_test(env, connstr, expected_events_and_outcome): + """Connect with connstr and verify the negotiation events and outcome. + + Mirrors connect_test: runs psql with -c 'SELECT current_enc()', records the + log produced during the attempt, derives the EVENTS from it, and compares + "events -> outcome" against the expectation. A non-zero psql exit yields the + outcome 'fail', otherwise the trimmed stdout (plain/ssl/gss) is the outcome. + """ + node = env.node + test_name = " '{}' -> {}".format(connstr, expected_events_and_outcome) + + connstr_full = "" + if "dbname=" not in connstr: + connstr_full += "dbname=postgres " + if "host=" not in connstr: + connstr_full += "host={} hostaddr={} ".format(_HOST, _HOSTADDR) + connstr_full += connstr + + log_location = node.current_log_position() + + result = node.psql_capture( + "", + connstr=connstr_full, + on_error_stop=False, + extra_params=["--no-password", "--command", "SELECT current_enc()"], + ) + outcome = result.stdout if result.rc == 0 else "fail" + + log_contents = slurp_file(node.log, log_location) + events = _parse_log_events(log_contents) + events_and_outcome = ", ".join(events) + " -> {}".format(outcome) + + if events_and_outcome != expected_events_and_outcome: + env.failures.append( + "{}: got {!r} stderr {!r}".format( + test_name, events_and_outcome, result.stderr + ) + ) + + +def _test_matrix(env, test_users, gssencmodes, sslmodes, sslnegotiations, expected): + """Test the cube of user x gssencmode x sslmode x sslnegotiation. + + Mirrors test_matrix: missing table entries are reported as a missing line. + """ + for test_user in test_users: + for gssencmode in gssencmodes: + for client_mode in sslmodes: + for negotiation in sslnegotiations: + key = "{} {} {} {}".format( + test_user, gssencmode, client_mode, negotiation + ) + expected_events = expected.get( + key, "" + ) + _connect_test( + env, + "user={} gssencmode={} sslmode={} sslnegotiation={}".format( + test_user, gssencmode, client_mode, negotiation + ), + expected_events, + ) + + +def _setup_server(create_pg, ssl_supported, gss_supported): + """Initialise and start the test server (SSL/GSS not yet enabled). + + Mirrors the server preparation: TCP listener on the loopback, connection- + negotiation logging, the users, current_enc(), and the narrow pg_hba.conf. + """ + node = create_pg("node", start=False) + node.append_conf( + "listen_addresses = '{}'\n" + "log_connections = 'receipt,authentication,authorization'\n" + "log_disconnections = on\n" + "trace_connection_negotiation = on\n" + "lc_messages = 'C'\n".format(_HOSTADDR) + ) + pgdata = node.datadir + + if ssl_supported: + # The SSL cert fixtures live under src/test/ssl/ssl. From this file + # (src/interfaces/libpq/pyt/) the repo root is four parents up. + repo_root = pathlib.Path(__file__).resolve().parents[4] + certdir = repo_root / "src" / "test" / "ssl" / "ssl" + shutil.copy(certdir / "server-cn-only.crt", pgdata / "server.crt") + shutil.copy(certdir / "server-cn-only.key", pgdata / "server.key") + os.chmod(pgdata / "server.key", 0o600) + node.append_conf("ssl = off\n") + + node.start() + + injection_points = node.check_extension("injection_points") + + for user in ( + "localuser", + "testuser", + "ssluser", + "nossluser", + "gssuser", + "nogssuser", + ): + node.safe_psql("CREATE USER {};".format(user)) + if injection_points: + node.safe_psql("CREATE EXTENSION injection_points;") + + node.safe_psql(_CURRENT_ENC_FN) + + # Capture the socket directory now, while the default trust-all pg_hba.conf + # still admits the OS user; the narrow rules written below would reject it. + unixdir = node.safe_psql("SHOW unix_socket_directories;") + + hba = ( + "\n" + "# TYPE DATABASE USER ADDRESS METHOD OPTIONS\n" + "local postgres localuser trust\n" + "host postgres testuser {cidr} trust\n" + "hostnossl postgres nossluser {cidr} trust\n" + "hostnogssenc postgres nogssuser {cidr} trust\n" + ).format(cidr=_SERVERCIDR) + if ssl_supported: + hba += ( + "\nhostssl postgres ssluser {cidr} trust\n" + ).format(cidr=_SERVERCIDR) + if gss_supported: + hba += ( + "\nhostgssenc postgres gssuser {cidr} trust\n" + ).format(cidr=_SERVERCIDR) + with open(pgdata / "pg_hba.conf", "w", encoding="utf-8") as fh: + fh.write(hba) + node.reload() + + return node, injection_points, unixdir + + +def _table_ssl_gss_disabled(ssl_supported): + """The expected table for SSL and GSS both disabled in the server.""" + if ssl_supported: + table = """ +# USER GSSENCMODE SSLMODE SSLNEGOTIATION EVENTS -> OUTCOME +testuser disable disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslreject, authok -> plain +. . require postgres connect, sslreject -> fail +. . . direct connect, directsslreject -> fail +. prefer disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslreject, authok -> plain +. . require postgres connect, sslreject -> fail +. . . direct connect, directsslreject -> fail + +# sslnegotiation=direct is not accepted unless sslmode=require or stronger +* * disable direct - -> fail +* * allow direct - -> fail +* * prefer direct - -> fail +""" + else: + table = """ +# USER GSSENCMODE SSLMODE SSLNEGOTIATION EVENTS -> OUTCOME +testuser disable disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, authok -> plain +. prefer disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, authok -> plain + +# Without SSL support, sslmode=require and sslnegotiation=direct are +# not accepted at all +* * require * - -> fail +* * * direct - -> fail + """ + # All attempts with gssencmode=require fail without connecting because no + # credential cache has been configured (or GSS is not compiled in). + table += """ +testuser require * * - -> fail +""" + return table + + +def _table_ssl_enabled(): + """The expected table for SSL enabled and GSS disabled in the server.""" + return """ +# USER GSSENCMODE SSLMODE SSLNEGOTIATION EVENTS -> OUTCOME +testuser disable disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslaccept, authok -> ssl +. . require postgres connect, sslaccept, authok -> ssl +. . . direct connect, directsslaccept, authok -> ssl +ssluser . disable postgres connect, authfail -> fail +. . allow postgres connect, authfail, reconnect, sslaccept, authok -> ssl +. . prefer postgres connect, sslaccept, authok -> ssl +. . require postgres connect, sslaccept, authok -> ssl +. . . direct connect, directsslaccept, authok -> ssl +nossluser . disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslaccept, authfail, reconnect, authok -> plain +. . require postgres connect, sslaccept, authfail -> fail +. . require direct connect, directsslaccept, authfail -> fail + +# sslnegotiation=direct is not accepted unless sslmode=require or stronger +* * disable direct - -> fail +* * allow direct - -> fail +* * prefer direct - -> fail +""" + + +def _table_gss_enabled(ssl_supported): + """The expected table for GSS enabled and SSL disabled in the server.""" + base = """ +# USER GSSENCMODE SSLMODE SSLNEGOTIATION EVENTS -> OUTCOME +testuser disable disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslreject, authok -> plain +. . require postgres connect, sslreject -> fail +. . . direct connect, directsslreject -> fail +. prefer * postgres connect, gssaccept, authok -> gss +. prefer require direct connect, gssaccept, authok -> gss +. require * postgres connect, gssaccept, authok -> gss +. . require direct connect, gssaccept, authok -> gss + +gssuser disable disable postgres connect, authfail -> fail +. . allow postgres connect, authfail, reconnect, sslreject -> fail +. . prefer postgres connect, sslreject, authfail -> fail +. . require postgres connect, sslreject -> fail +. . . direct connect, directsslreject -> fail +. prefer * postgres connect, gssaccept, authok -> gss +. prefer require direct connect, gssaccept, authok -> gss +. require * postgres connect, gssaccept, authok -> gss +. . require direct connect, gssaccept, authok -> gss + +nogssuser disable disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslreject, authok -> plain +. . require postgres connect, sslreject -> fail +. . . direct connect, directsslreject -> fail +. prefer disable postgres connect, gssaccept, authfail, reconnect, authok -> plain +. . allow postgres connect, gssaccept, authfail, reconnect, authok -> plain +. . prefer postgres connect, gssaccept, authfail, reconnect, sslreject, authok -> plain +. . require postgres connect, gssaccept, authfail, reconnect, sslreject -> fail +. . . direct connect, gssaccept, authfail, reconnect, directsslreject -> fail +. require disable postgres connect, gssaccept, authfail -> fail +. . allow postgres connect, gssaccept, authfail -> fail +. . prefer postgres connect, gssaccept, authfail -> fail +. . require postgres connect, gssaccept, authfail -> fail # If both GSSAPI and sslmode are required, and GSS is not available -> fail +. . . direct connect, gssaccept, authfail -> fail # If both GSSAPI and sslmode are required, and GSS is not available -> fail + +# sslnegotiation=direct is not accepted unless sslmode=require or stronger +* * disable direct - -> fail +* * allow direct - -> fail +* * prefer direct - -> fail + """ + if ssl_supported: + sslmodes, sslnegotiations = _ALL_SSLMODES, _ALL_SSLNEGOTIATIONS + else: + sslmodes, sslnegotiations = ["disable"], ["postgres"] + return base, sslmodes, sslnegotiations + + +def _table_ssl_and_gss_enabled(): + """The expected table for both GSS and SSL enabled in the server.""" + return """ +# USER GSSENCMODE SSLMODE SSLNEGOTIATION EVENTS -> OUTCOME +testuser disable disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslaccept, authok -> ssl +. . require postgres connect, sslaccept, authok -> ssl +. . . direct connect, directsslaccept, authok -> ssl +. prefer disable postgres connect, gssaccept, authok -> gss +. . allow postgres connect, gssaccept, authok -> gss +. . prefer postgres connect, gssaccept, authok -> gss +. . require postgres connect, gssaccept, authok -> gss # If both GSS and SSL is possible, GSS is chosen over SSL, even if sslmode=require +. . . direct connect, gssaccept, authok -> gss +. require disable postgres connect, gssaccept, authok -> gss +. . allow postgres connect, gssaccept, authok -> gss +. . prefer postgres connect, gssaccept, authok -> gss +. . require postgres connect, gssaccept, authok -> gss # If both GSS and SSL is possible, GSS is chosen over SSL, even if sslmode=require +. . . direct connect, gssaccept, authok -> gss + +gssuser disable disable postgres connect, authfail -> fail +. . allow postgres connect, authfail, reconnect, sslaccept, authfail -> fail +. . prefer postgres connect, sslaccept, authfail, reconnect, authfail -> fail +. . require postgres connect, sslaccept, authfail -> fail +. . . direct connect, directsslaccept, authfail -> fail +. prefer disable postgres connect, gssaccept, authok -> gss +. . allow postgres connect, gssaccept, authok -> gss +. . prefer postgres connect, gssaccept, authok -> gss +. . require postgres connect, gssaccept, authok -> gss # GSS is chosen over SSL, even though sslmode=require +. . . direct connect, gssaccept, authok -> gss +. require disable postgres connect, gssaccept, authok -> gss +. . allow postgres connect, gssaccept, authok -> gss +. . prefer postgres connect, gssaccept, authok -> gss +. . require postgres connect, gssaccept, authok -> gss # If both GSS and SSL is possible, GSS is chosen over SSL, even if sslmode=require +. . . direct connect, gssaccept, authok -> gss + +ssluser disable disable postgres connect, authfail -> fail +. . allow postgres connect, authfail, reconnect, sslaccept, authok -> ssl +. . prefer postgres connect, sslaccept, authok -> ssl +. . require postgres connect, sslaccept, authok -> ssl +. . . direct connect, directsslaccept, authok -> ssl +. prefer disable postgres connect, gssaccept, authfail, reconnect, authfail -> fail +. . allow postgres connect, gssaccept, authfail, reconnect, authfail, reconnect, sslaccept, authok -> ssl +. . prefer postgres connect, gssaccept, authfail, reconnect, sslaccept, authok -> ssl +. . require postgres connect, gssaccept, authfail, reconnect, sslaccept, authok -> ssl +. . . direct connect, gssaccept, authfail, reconnect, directsslaccept, authok -> ssl +. require disable postgres connect, gssaccept, authfail -> fail +. . allow postgres connect, gssaccept, authfail -> fail +. . prefer postgres connect, gssaccept, authfail -> fail +. . require postgres connect, gssaccept, authfail -> fail # If both GSS and SSL are required, the sslmode=require is effectively ignored and GSS is required +. . . direct connect, gssaccept, authfail -> fail + +nogssuser disable disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslaccept, authok -> ssl +. . require postgres connect, sslaccept, authok -> ssl +. . . direct connect, directsslaccept, authok -> ssl +. prefer disable postgres connect, gssaccept, authfail, reconnect, authok -> plain +. . allow postgres connect, gssaccept, authfail, reconnect, authok -> plain +. . prefer postgres connect, gssaccept, authfail, reconnect, sslaccept, authok -> ssl +. . require postgres connect, gssaccept, authfail, reconnect, sslaccept, authok -> ssl +. . . direct connect, gssaccept, authfail, reconnect, directsslaccept, authok -> ssl +. require disable postgres connect, gssaccept, authfail -> fail +. . allow postgres connect, gssaccept, authfail -> fail +. . prefer postgres connect, gssaccept, authfail -> fail +. . require postgres connect, gssaccept, authfail -> fail # If both GSS and SSL are required, the sslmode=require is effectively ignored and GSS is required +. . . direct connect, gssaccept, authfail -> fail + +nossluser disable disable postgres connect, authok -> plain +. . allow postgres connect, authok -> plain +. . prefer postgres connect, sslaccept, authfail, reconnect, authok -> plain +. . require postgres connect, sslaccept, authfail -> fail +. . . direct connect, directsslaccept, authfail -> fail +. prefer * postgres connect, gssaccept, authok -> gss +. . require direct connect, gssaccept, authok -> gss +. require * postgres connect, gssaccept, authok -> gss +. . require direct connect, gssaccept, authok -> gss + +# sslnegotiation=direct is not accepted unless sslmode=require or stronger +* * disable direct - -> fail +* * allow direct - -> fail +* * prefer direct - -> fail + """ + + +def _run_injection_ssl(env, unixdir): + """Injection-point error scenarios with SSL enabled in the server.""" + node = env.node + node.safe_psql( + "SELECT injection_points_attach('backend-initialize', 'error');", + connstr="user=localuser host={}".format(unixdir), + ) + _connect_test(env, "user=testuser sslmode=prefer", "connect, backenderror -> fail") + node.restart() + + node.safe_psql( + "SELECT injection_points_attach('backend-initialize-v2-error', 'error');", + connstr="user=localuser host={}".format(unixdir), + ) + _connect_test(env, "user=testuser sslmode=prefer", "connect, v2error -> fail") + node.restart() + + node.safe_psql( + "SELECT injection_points_attach('backend-ssl-startup', 'error');", + connstr="user=localuser host={}".format(unixdir), + ) + _connect_test( + env, + "user=testuser sslmode=prefer", + "connect, sslaccept, backenderror, reconnect, authok -> plain", + ) + node.restart() + + +def _run_injection_gss(env, unixdir): + """Injection-point error scenarios with GSS enabled in the server.""" + node = env.node + node.safe_psql( + "SELECT injection_points_attach('backend-initialize', 'error');", + connstr="user=localuser host={}".format(unixdir), + ) + _connect_test( + env, + "user=testuser gssencmode=prefer sslmode=disable", + "connect, backenderror, reconnect, backenderror -> fail", + ) + node.restart() + + node.safe_psql( + "SELECT injection_points_attach('backend-initialize-v2-error', 'error');", + connstr="user=localuser host={}".format(unixdir), + ) + _connect_test( + env, + "user=testuser gssencmode=prefer sslmode=disable", + "connect, v2error, reconnect, v2error -> fail", + ) + node.restart() + + node.safe_psql( + "SELECT injection_points_attach('backend-gssapi-startup', 'error');", + connstr="user=localuser host={}".format(unixdir), + ) + _connect_test( + env, + "user=testuser gssencmode=prefer sslmode=disable", + "connect, gssaccept, backenderror, reconnect, authok -> plain", + ) + node.restart() + + +def _run_ssl_section(env, kerberos_enabled): + """Tests with GSS disabled and SSL enabled in the server.""" + if not env.ssl_supported: + return + node = env.node + table = _table_ssl_enabled() + + node.adjust_conf("ssl", "on") + node.reload() + + _test_matrix( + env, + ["testuser", "ssluser", "nossluser"], + ["disable"], + _ALL_SSLMODES, + _ALL_SSLNEGOTIATIONS, + _parse_table(table), + ) + + if env.injection_points: + _run_injection_ssl(env, env.unixdir) + + node.adjust_conf("ssl", "off") + node.reload() + _ = kerberos_enabled + + +def _run_unix_section(env): + """Negotiation over Unix-domain sockets (no SSL or GSSAPI attempted).""" + unixdir = env.unixdir + if unixdir == "": + return + _connect_test( + env, + "user=localuser gssencmode=prefer sslmode=prefer host={}".format(unixdir), + "connect, authok -> plain", + ) + _connect_test( + env, + "user=localuser gssencmode=require sslmode=prefer host={}".format(unixdir), + "- -> fail", + ) + + +def test_005_negotiate_encryption(create_pg): + """SSL/GSS encryption negotiation across the full client-option matrix.""" + pypg.skip_unless_test_extras("libpq_encryption") + + gss_supported = os.environ.get("with_gssapi") == "yes" + kerberos_enabled = "kerberos" in os.environ.get("PG_TEST_EXTRA", "").split() + ssl_supported = os.environ.get("with_ssl") == "openssl" + + node, injection_points, unixdir = _setup_server( + create_pg, ssl_supported, gss_supported + ) + env = _Env(node, ssl_supported, gss_supported, injection_points, unixdir) + + # Run tests with GSS and SSL disabled in the server. + table = _table_ssl_gss_disabled(ssl_supported) + _test_matrix( + env, + ["testuser"], + _ALL_GSSENCMODES, + _ALL_SSLMODES, + _ALL_SSLNEGOTIATIONS, + _parse_table(table), + ) + + _run_ssl_section(env, kerberos_enabled) + + # The GSSAPI sections require a GSSAPI build and kerberos in PG_TEST_EXTRA, + # and rely on PostgreSQL::Test::Kerberos to provision principals/tickets. + # That Kerberos test infrastructure has no pypg equivalent yet, so when GSS + # would otherwise run we surface that as a skip rather than silently passing. + if gss_supported and kerberos_enabled: + pytest.skip( + "GSSAPI/Kerberos sections require PostgreSQL::Test::Kerberos, " + "which has no pypg port yet" + ) + + _run_unix_section(env) + + node.teardown_node() + + assert not env.failures, "negotiation mismatches:\n" + "\n".join(env.failures) diff --git a/src/interfaces/libpq/pyt/test_006_service.py b/src/interfaces/libpq/pyt/test_006_service.py new file mode 100644 index 0000000000000..69dd912a85c73 --- /dev/null +++ b/src/interfaces/libpq/pyt/test_006_service.py @@ -0,0 +1,330 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group + +"""Port of src/interfaces/libpq/t/006_service.pl. + +Tests connection scenarios driven by the service name and the service file, +covering the "service"/"servicefile" connection options and their environment +variables (PGSERVICE, PGSERVICEFILE, PGSYSCONFDIR), including nested-directive +rejection and the precedence of the servicefile option over PGSERVICEFILE. + +A real server ("node") provides the working host/port, written into a service +file. A second, never-started "dummy_node" is used for the connection attempts: +that way the environment variables used for the connection do not interfere with +the connection attempts, and the service file's contents are exercised instead. +""" + +import os +import re +import shutil + + +def _append(path, text): + with open(path, "a", encoding="utf-8") as fh: + fh.write(text) + + +def _connect_ok(dummy_node, env_overrides, connstr, name, sql, expected_stdout): + """connect_ok with PGSERVICE*/PGSYSCONFDIR temporarily applied to os.environ.""" + saved = {k: os.environ.get(k) for k in env_overrides} + try: + _apply_env(env_overrides) + dummy_node.connect_ok(connstr, name, sql=sql, expected_stdout=expected_stdout) + finally: + _restore_env(saved) + + +def _connect_fails(dummy_node, env_overrides, connstr, name, expected_stderr): + """connect_fails with PGSERVICE*/PGSYSCONFDIR temporarily applied.""" + saved = {k: os.environ.get(k) for k in env_overrides} + try: + _apply_env(env_overrides) + dummy_node.connect_fails(connstr, name, expected_stderr=expected_stderr) + finally: + _restore_env(saved) + + +def _apply_env(env_overrides): + for key, value in env_overrides.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + +def _restore_env(saved): + for key, value in saved.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + +def _build_service_files(td, node): + """Create the set of service files used by the tests; return their paths.""" + # File that includes a valid service name, using a decomposed connection + # string for its contents, split on spaces. + srvfile_valid = os.path.join(td, "pg_service_valid.conf") + _append(srvfile_valid, "[my_srv]\n") + for param in re.split(r"\s+", node.connstr()): + if param: + _append(srvfile_valid, param + "\n") + + # File defined with no contents, used as default value for PGSERVICEFILE so + # that no lookup is attempted in the user's home directory. + srvfile_empty = os.path.join(td, "pg_service_empty.conf") + _append(srvfile_empty, "") + + # Default service file in PGSYSCONFDIR. + srvfile_default = os.path.join(td, "pg_service.conf") + + # Missing service file. + srvfile_missing = os.path.join(td, "pg_service_missing.conf") + + # Service file with nested "service" defined. + srvfile_nested = os.path.join(td, "pg_service_nested.conf") + shutil.copy(srvfile_valid, srvfile_nested) + _append(srvfile_nested, "service=invalid_srv\n") + + # Service file with nested "servicefile" defined. + srvfile_nested_2 = os.path.join(td, "pg_service_nested_2.conf") + shutil.copy(srvfile_valid, srvfile_nested_2) + _append(srvfile_nested_2, "servicefile=" + srvfile_default + "\n") + + return { + "valid": srvfile_valid, + "empty": srvfile_empty, + "default": srvfile_default, + "missing": srvfile_missing, + "nested": srvfile_nested, + "nested_2": srvfile_nested_2, + } + + +def _test_valid_service_file(dummy_node, td, files): + """Checks combinations of service name and a valid service file.""" + base = {"PGSYSCONFDIR": td, "PGSERVICEFILE": files["valid"]} + _connect_ok( + dummy_node, + base, + "service=my_srv", + 'connection with correct "service" string and PGSERVICEFILE', + "SELECT 'connect1_1'", + r"connect1_1", + ) + _connect_ok( + dummy_node, + base, + "postgres://?service=my_srv", + 'connection with correct "service" URI and PGSERVICEFILE', + "SELECT 'connect1_2'", + r"connect1_2", + ) + _connect_fails( + dummy_node, + base, + "service=undefined-service", + 'connection with incorrect "service" string and PGSERVICEFILE', + r'definition of service "undefined-service" not found', + ) + _connect_ok( + dummy_node, + {**base, "PGSERVICE": "my_srv"}, + "", + "connection with correct PGSERVICE and PGSERVICEFILE", + "SELECT 'connect1_3'", + r"connect1_3", + ) + # The Perl original uses expected_stdout here even though the message goes + # to stderr; connect_fails only asserts the non-zero exit in that case, so + # match the same observable behaviour by checking the failure alone. + _connect_fails( + dummy_node, + {**base, "PGSERVICE": "undefined-service"}, + "", + "connection with incorrect PGSERVICE and PGSERVICEFILE", + None, + ) + + +def _test_missing_service_file(dummy_node, td, files): + """Checks case of an incorrect (missing) service file.""" + _connect_fails( + dummy_node, + {"PGSYSCONFDIR": td, "PGSERVICEFILE": files["missing"]}, + "service=my_srv", + 'connection with correct "service" string and incorrect PGSERVICEFILE', + r'service file ".*pg_service_missing.conf" not found', + ) + + +def _test_default_service_file(dummy_node, td, files): + """Checks the service file named "pg_service.conf" in PGSYSCONFDIR.""" + srvfile_default = os.path.join(td, "pg_service.conf") + shutil.copy(files["valid"], srvfile_default) + base = {"PGSYSCONFDIR": td, "PGSERVICEFILE": files["empty"]} + + _connect_ok( + dummy_node, + base, + "service=my_srv", + 'connection with correct "service" string and pg_service.conf', + "SELECT 'connect2_1'", + r"connect2_1", + ) + _connect_ok( + dummy_node, + base, + "postgres://?service=my_srv", + 'connection with correct "service" URI and default pg_service.conf', + "SELECT 'connect2_2'", + r"connect2_2", + ) + _connect_fails( + dummy_node, + base, + "service=undefined-service", + 'connection with incorrect "service" string and default pg_service.conf', + r'definition of service "undefined-service" not found', + ) + _connect_ok( + dummy_node, + {**base, "PGSERVICE": "my_srv"}, + "", + "connection with correct PGSERVICE and default pg_service.conf", + "SELECT 'connect2_3'", + r"connect2_3", + ) + _connect_ok( + dummy_node, + base, + "service=my_srv servicefile='{}'".format(files["empty"]), + "SERVICEFILE updated when service is found in default pg_service.conf", + r"\echo :SERVICEFILE", + r"^{}$".format(re.escape(srvfile_default)), + ) + _connect_fails( + dummy_node, + {**base, "PGSERVICE": "undefined-service"}, + "", + "connection with incorrect PGSERVICE and default pg_service.conf", + None, + ) + # Remove default pg_service.conf. + os.unlink(srvfile_default) + + +def _test_nested_service_file(dummy_node, td, files): + """Checks nested service file contents are rejected.""" + _connect_fails( + dummy_node, + {"PGSYSCONFDIR": td, "PGSERVICEFILE": files["nested"]}, + "service=my_srv", + 'connection with "service" in nested service file', + r'nested "service" specifications not supported in service file', + ) + _connect_fails( + dummy_node, + {"PGSYSCONFDIR": td, "PGSERVICEFILE": files["nested_2"]}, + "service=my_srv", + 'connection with "servicefile" in nested service file', + r'nested "servicefile" specifications not supported in service file', + ) + + +def _test_servicefile_option(dummy_node, td, files): + """Checks that the "servicefile" option works as expected.""" + base = {"PGSYSCONFDIR": td, "PGSERVICEFILE": files["empty"]} + srvfile = files["valid"] + + # Encode slashes and backslashes for the URI form. + encoded_srvfile = re.sub( + r"[\\/]", lambda m: "%2F" if m.group(0) == "/" else "%5C", srvfile + ) + # Additionally encode a colon (Windows servicefile paths). + encoded_srvfile = encoded_srvfile.replace(":", "%3A") + + _connect_ok( + dummy_node, + base, + "service=my_srv servicefile='{}'".format(srvfile), + "connection with valid servicefile in connection string", + "SELECT 'connect3_1'", + r"connect3_1", + ) + _connect_ok( + dummy_node, + base, + "postgresql:///?service=my_srv&servicefile=" + encoded_srvfile, + "connection with valid servicefile in URI", + "SELECT 'connect3_2'", + r"connect3_2", + ) + _connect_ok( + dummy_node, + {**base, "PGSERVICE": "my_srv"}, + "servicefile='{}'".format(srvfile), + "connection with PGSERVICE and servicefile in connection string", + "SELECT 'connect3_3'", + r"connect3_3", + ) + _connect_ok( + dummy_node, + {**base, "PGSERVICE": "my_srv"}, + "postgresql://?servicefile=" + encoded_srvfile, + "connection with PGSERVICE and servicefile in URI", + "SELECT 'connect3_4'", + r"connect3_4", + ) + + +def _test_servicefile_priority(dummy_node, td, files): + """servicefile option takes priority over the PGSERVICEFILE env var.""" + srvfile = files["valid"] + _connect_fails( + dummy_node, + {"PGSYSCONFDIR": td, "PGSERVICEFILE": "non-existent-file.conf"}, + "service=my_srv", + "connection with invalid PGSERVICEFILE", + r'service file "non-existent-file\.conf" not found', + ) + _connect_ok( + dummy_node, + {"PGSYSCONFDIR": td, "PGSERVICEFILE": "non-existent-file.conf"}, + "service=my_srv servicefile='{}'".format(srvfile), + "connection with both servicefile and PGSERVICEFILE", + "SELECT 'connect4_1'", + r"connect4_1", + ) + + +def test_006_service(create_pg, tmp_path): + """Service name and service file connection scenarios.""" + node = create_pg("node") + + # Set up a dummy node used for the connection tests, but do not start it. + # This ensures the environment variables used for the connection do not + # interfere with the connection attempts, and the service file's contents + # are used. + dummy_node = create_pg("dummy_node", start=False) + + td = str(tmp_path) + files = _build_service_files(td, node) + + # PGSYSCONFDIR is used if the service file defined in PGSERVICEFILE cannot + # be found, or when a service file is found but not the service name. + # PGSERVICEFILE is forced to a default location so this test never looks at + # a home directory. + saved = {k: os.environ.get(k) for k in ("PGSYSCONFDIR", "PGSERVICEFILE")} + os.environ["PGSYSCONFDIR"] = td + os.environ["PGSERVICEFILE"] = files["empty"] + try: + _test_valid_service_file(dummy_node, td, files) + _test_missing_service_file(dummy_node, td, files) + _test_default_service_file(dummy_node, td, files) + _test_nested_service_file(dummy_node, td, files) + _test_servicefile_option(dummy_node, td, files) + _test_servicefile_priority(dummy_node, td, files) + finally: + _restore_env(saved) + + node.teardown_node() diff --git a/src/interfaces/libpq/pyt/test_load_balance.py b/src/interfaces/libpq/pyt/test_load_balance.py index 0af46d8f37ded..3804814738950 100644 --- a/src/interfaces/libpq/pyt/test_load_balance.py +++ b/src/interfaces/libpq/pyt/test_load_balance.py @@ -78,7 +78,7 @@ def load_balance_nodes_dns(create_pg_module): hosts_path = "/etc/hosts" try: - with open(hosts_path) as f: + with open(hosts_path, encoding="utf-8") as f: hosts_content = f.read() except (OSError, IOError): pytest.skip(f"Could not read hosts file: {hosts_path}") @@ -97,9 +97,9 @@ def load_balance_nodes_dns(create_pg_module): # Allow trust authentication for TCP connections from loopback for node in nodes: hba_path = node.datadir / "pg_hba.conf" - with open(hba_path, "r") as f: + with open(hba_path, "r", encoding="utf-8") as f: original_content = f.read() - with open(hba_path, "w") as f: + with open(hba_path, "w", encoding="utf-8") as f: f.write("host all all 127.0.0.0/8 trust\n") f.write(original_content) node.pg_ctl("reload") diff --git a/src/interfaces/libpq/t/003_load_balance_host_list.pl b/src/interfaces/libpq/t/003_load_balance_host_list.pl new file mode 100644 index 0000000000000..1f970ff994b51 --- /dev/null +++ b/src/interfaces/libpq/t/003_load_balance_host_list.pl @@ -0,0 +1,94 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group +use strict; +use warnings FATAL => 'all'; +use Config; +use PostgreSQL::Test::Utils; +use PostgreSQL::Test::Cluster; +use Test::More; + +# This tests load balancing across the list of different hosts in the host +# parameter of the connection string. + +# Cluster setup which is shared for testing both load balancing methods +my $node1 = PostgreSQL::Test::Cluster->new('node1'); +my $node2 = PostgreSQL::Test::Cluster->new('node2', own_host => 1); +my $node3 = PostgreSQL::Test::Cluster->new('node3', own_host => 1); + +# Create a data directory with initdb +$node1->init(); +$node2->init(); +$node3->init(); + +# Start the PostgreSQL server +$node1->start(); +$node2->start(); +$node3->start(); + +# Start the tests for load balancing method 1 +my $hostlist = $node1->host . ',' . $node2->host . ',' . $node3->host; +my $portlist = $node1->port . ',' . $node2->port . ',' . $node3->port; + +$node1->connect_fails( + "host=$hostlist port=$portlist load_balance_hosts=doesnotexist", + "load_balance_hosts doesn't accept unknown values", + expected_stderr => qr/invalid load_balance_hosts value: "doesnotexist"/); + +# load_balance_hosts=disable should always choose the first one. +$node1->connect_ok( + "host=$hostlist port=$portlist load_balance_hosts=disable", + "load_balance_hosts=disable connects to the first node", + sql => "SELECT 'connect1'", + log_like => [qr/statement: SELECT 'connect1'/]); + +# Statistically the following loop with load_balance_hosts=random will almost +# certainly connect at least once to each of the nodes. The chance of that not +# happening is so small that it's negligible: (2/3)^50 = 1.56832855e-9 +foreach my $i (1 .. 50) +{ + $node1->connect_ok( + "host=$hostlist port=$portlist load_balance_hosts=random", + "repeated connections with random load balancing", + sql => "SELECT 'connect2'"); +} + +my $node1_occurrences = () = + $node1->log_content() =~ /statement: SELECT 'connect2'/g; +my $node2_occurrences = () = + $node2->log_content() =~ /statement: SELECT 'connect2'/g; +my $node3_occurrences = () = + $node3->log_content() =~ /statement: SELECT 'connect2'/g; + +my $total_occurrences = + $node1_occurrences + $node2_occurrences + $node3_occurrences; + +cmp_ok($node1_occurrences, '>', 1, + "received at least one connection on node1"); +cmp_ok($node2_occurrences, '>', 1, + "received at least one connection on node2"); +cmp_ok($node3_occurrences, '>', 1, + "received at least one connection on node3"); +is($total_occurrences, 50, "received 50 connections across all nodes"); + +$node1->stop(); +$node2->stop(); + +# load_balance_hosts=disable should continue trying hosts until it finds a +# working one. +$node3->connect_ok( + "host=$hostlist port=$portlist load_balance_hosts=disable", + "load_balance_hosts=disable continues until it connects to the a working node", + sql => "SELECT 'connect3'", + log_like => [qr/statement: SELECT 'connect3'/]); + +# Also with load_balance_hosts=random we continue to the next nodes if previous +# ones are down. Connect a few times to make sure it's not just lucky. +foreach my $i (1 .. 5) +{ + $node3->connect_ok( + "host=$hostlist port=$portlist load_balance_hosts=random", + "load_balance_hosts=random continues until it connects to the a working node", + sql => "SELECT 'connect4'", + log_like => [qr/statement: SELECT 'connect4'/]); +} + +done_testing(); diff --git a/src/interfaces/libpq/t/004_load_balance_dns.pl b/src/interfaces/libpq/t/004_load_balance_dns.pl new file mode 100644 index 0000000000000..e1ff9a0602480 --- /dev/null +++ b/src/interfaces/libpq/t/004_load_balance_dns.pl @@ -0,0 +1,144 @@ +# Copyright (c) 2023-2026, PostgreSQL Global Development Group +use strict; +use warnings FATAL => 'all'; +use Config; +use PostgreSQL::Test::Utils; +use PostgreSQL::Test::Cluster; +use Test::More; + +if (!$ENV{PG_TEST_EXTRA} || $ENV{PG_TEST_EXTRA} !~ /\bload_balance\b/) +{ + plan skip_all => + 'Potentially unsafe test load_balance not enabled in PG_TEST_EXTRA'; +} + +# This tests loadbalancing based on a DNS entry that contains multiple records +# for different IPs. Since setting up a DNS server is more effort than we +# consider reasonable to run this test, this situation is instead imitated by +# using a hosts file where a single hostname maps to multiple different IP +# addresses. This test requires the administrator to add the following lines to +# the hosts file (if we detect that this hasn't happened we skip the test): +# +# 127.0.0.1 pg-loadbalancetest +# 127.0.0.2 pg-loadbalancetest +# 127.0.0.3 pg-loadbalancetest +# +# Windows or Linux are required to run this test because these OSes allow +# binding to 127.0.0.2 and 127.0.0.3 addresses by default, but other OSes +# don't. We need to bind to different IP addresses, so that we can use these +# different IP addresses in the hosts file. +# +# The hosts file needs to be prepared before running this test. We don't do it +# on the fly, because it requires root permissions to change the hosts file. In +# CI we set up the previously mentioned rules in the hosts file, so that this +# load balancing method is tested. + +# Cluster setup which is shared for testing both load balancing methods +my $can_bind_to_127_0_0_2 = + $Config{osname} eq 'linux' || $PostgreSQL::Test::Utils::windows_os; + +# Checks for the requirements for testing load balancing method 2 +if (!$can_bind_to_127_0_0_2) +{ + plan skip_all => 'load_balance test only supported on Linux and Windows'; +} + +my $hosts_path; +if ($windows_os) +{ + $hosts_path = 'c:\Windows\System32\Drivers\etc\hosts'; +} +else +{ + $hosts_path = '/etc/hosts'; +} + +my $hosts_content = PostgreSQL::Test::Utils::slurp_file($hosts_path); + +my $hosts_count = () = + $hosts_content =~ /127\.0\.0\.[1-3] pg-loadbalancetest/g; +if ($hosts_count != 3) +{ + # Host file is not prepared for this test + plan skip_all => "hosts file was not prepared for DNS load balance test"; +} + +$PostgreSQL::Test::Cluster::use_tcp = 1; +$PostgreSQL::Test::Cluster::test_pghost = '127.0.0.1'; +my $port = PostgreSQL::Test::Cluster::get_free_port(); +my $node1 = PostgreSQL::Test::Cluster->new('node1', port => $port); +my $node2 = + PostgreSQL::Test::Cluster->new('node2', port => $port, own_host => 1); +my $node3 = + PostgreSQL::Test::Cluster->new('node3', port => $port, own_host => 1); + +# Create a data directory with initdb +$node1->init(); +$node2->init(); +$node3->init(); + +# Start the PostgreSQL server +$node1->start(); +$node2->start(); +$node3->start(); + +# load_balance_hosts=disable should always choose the first one. +$node1->connect_ok( + "host=pg-loadbalancetest port=$port load_balance_hosts=disable", + "load_balance_hosts=disable connects to the first node", + sql => "SELECT 'connect1'", + log_like => [qr/statement: SELECT 'connect1'/]); + + +# Statistically the following loop with load_balance_hosts=random will almost +# certainly connect at least once to each of the nodes. The chance of that not +# happening is so small that it's negligible: (2/3)^50 = 1.56832855e-9 +foreach my $i (1 .. 50) +{ + $node1->connect_ok( + "host=pg-loadbalancetest port=$port load_balance_hosts=random", + "repeated connections with random load balancing", + sql => "SELECT 'connect2'"); +} + +my $node1_occurrences = () = + $node1->log_content() =~ /statement: SELECT 'connect2'/g; +my $node2_occurrences = () = + $node2->log_content() =~ /statement: SELECT 'connect2'/g; +my $node3_occurrences = () = + $node3->log_content() =~ /statement: SELECT 'connect2'/g; + +my $total_occurrences = + $node1_occurrences + $node2_occurrences + $node3_occurrences; + +cmp_ok($node1_occurrences, '>', 1, + "received at least one connection on node1"); +cmp_ok($node2_occurrences, '>', 1, + "received at least one connection on node2"); +cmp_ok($node3_occurrences, '>', 1, + "received at least one connection on node3"); +is($total_occurrences, 50, "received 50 connections across all nodes"); + +$node1->stop(); +$node2->stop(); + +# load_balance_hosts=disable should continue trying hosts until it finds a +# working one. +$node3->connect_ok( + "host=pg-loadbalancetest port=$port load_balance_hosts=disable", + "load_balance_hosts=disable continues until it connects to a working node", + sql => "SELECT 'connect3'", + log_like => [qr/statement: SELECT 'connect3'/]); + +# Also with load_balance_hosts=random we continue to the next nodes if previous +# ones are down. Connect a few times to make sure it's not just lucky. +foreach my $i (1 .. 5) +{ + $node3->connect_ok( + "host=pg-loadbalancetest port=$port load_balance_hosts=random", + "load_balance_hosts=random continues until it connects to a working node", + sql => "SELECT 'connect4'", + log_like => [qr/statement: SELECT 'connect4'/]); +} + +done_testing(); From 8b5084085154caf178a67c460d53bf1d506565e4 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:35 -0400 Subject: [PATCH 19/36] pytest: port the contrib TAP suites Port the contrib-module TAP suites (amcheck, auto_explain, basebackup_to_shell, bloom, dblink, oid2name, pg_prewarm, pg_stat_statements, pg_visibility, postgres_fdw, sepgsql, test_decoding, vacuumlo, and others) to pytest. Co-authored-by: Greg Burd --- contrib/amcheck/meson.build | 10 + contrib/amcheck/pyt/test_001_verify_heapam.py | 209 ++++++++++++++ contrib/amcheck/pyt/test_002_cic.py | 65 +++++ contrib/amcheck/pyt/test_003_cic_2pc.py | 123 +++++++++ .../pyt/test_004_verify_nbtree_unique.py | 72 +++++ contrib/amcheck/pyt/test_005_pitr.py | 79 ++++++ contrib/amcheck/pyt/test_006_verify_gin.py | 254 ++++++++++++++++++ contrib/auto_explain/meson.build | 5 + .../auto_explain/pyt/test_001_auto_explain.py | 196 ++++++++++++++ contrib/basebackup_to_shell/meson.build | 7 + .../basebackup_to_shell/pyt/test_001_basic.py | 127 +++++++++ contrib/bloom/meson.build | 5 + contrib/bloom/pyt/test_001_wal.py | 61 +++++ contrib/dblink/meson.build | 5 + contrib/dblink/pyt/test_001_auth_scram.py | 209 ++++++++++++++ contrib/oid2name/meson.build | 5 + contrib/oid2name/pyt/test_001_basic.py | 15 ++ contrib/pg_prewarm/meson.build | 5 + contrib/pg_prewarm/pyt/test_001_basic.py | 72 +++++ contrib/pg_stash_advice/meson.build | 5 + .../pg_stash_advice/pyt/test_001_persist.py | 57 ++++ contrib/pg_stat_statements/meson.build | 5 + .../pyt/test_010_restart.py | 42 +++ contrib/pg_visibility/meson.build | 6 + .../pyt/test_001_concurrent_transaction.py | 35 +++ .../pg_visibility/pyt/test_002_corrupt_vm.py | 68 +++++ contrib/postgres_fdw/meson.build | 6 + .../postgres_fdw/pyt/test_001_auth_scram.py | 185 +++++++++++++ .../postgres_fdw/pyt/test_010_subscription.py | 73 +++++ contrib/sepgsql/meson.build | 5 + contrib/sepgsql/pyt/test_001_sepgsql.py | 128 +++++++++ contrib/test_decoding/meson.build | 5 + .../test_decoding/pyt/test_001_repl_stats.py | 117 ++++++++ contrib/vacuumlo/meson.build | 5 + contrib/vacuumlo/pyt/test_001_basic.py | 15 ++ 35 files changed, 2281 insertions(+) create mode 100644 contrib/amcheck/pyt/test_001_verify_heapam.py create mode 100644 contrib/amcheck/pyt/test_002_cic.py create mode 100644 contrib/amcheck/pyt/test_003_cic_2pc.py create mode 100644 contrib/amcheck/pyt/test_004_verify_nbtree_unique.py create mode 100644 contrib/amcheck/pyt/test_005_pitr.py create mode 100644 contrib/amcheck/pyt/test_006_verify_gin.py create mode 100644 contrib/auto_explain/pyt/test_001_auto_explain.py create mode 100644 contrib/basebackup_to_shell/pyt/test_001_basic.py create mode 100644 contrib/bloom/pyt/test_001_wal.py create mode 100644 contrib/dblink/pyt/test_001_auth_scram.py create mode 100644 contrib/oid2name/pyt/test_001_basic.py create mode 100644 contrib/pg_prewarm/pyt/test_001_basic.py create mode 100644 contrib/pg_stash_advice/pyt/test_001_persist.py create mode 100644 contrib/pg_stat_statements/pyt/test_010_restart.py create mode 100644 contrib/pg_visibility/pyt/test_001_concurrent_transaction.py create mode 100644 contrib/pg_visibility/pyt/test_002_corrupt_vm.py create mode 100644 contrib/postgres_fdw/pyt/test_001_auth_scram.py create mode 100644 contrib/postgres_fdw/pyt/test_010_subscription.py create mode 100644 contrib/sepgsql/pyt/test_001_sepgsql.py create mode 100644 contrib/test_decoding/pyt/test_001_repl_stats.py create mode 100644 contrib/vacuumlo/pyt/test_001_basic.py diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index d5137ef691d61..36a2400a7898f 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -42,6 +42,16 @@ tests += { 'check_heap', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_002_cic.py', + 'pyt/test_003_cic_2pc.py', + 'pyt/test_004_verify_nbtree_unique.py', + 'pyt/test_005_pitr.py', + 'pyt/test_001_verify_heapam.py', + 'pyt/test_006_verify_gin.py', + ], + }, 'tap': { 'tests': [ 't/001_verify_heapam.pl', diff --git a/contrib/amcheck/pyt/test_001_verify_heapam.py b/contrib/amcheck/pyt/test_001_verify_heapam.py new file mode 100644 index 0000000000000..c2c7c979ece0f --- /dev/null +++ b/contrib/amcheck/pyt/test_001_verify_heapam.py @@ -0,0 +1,209 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of contrib/amcheck/t/001_verify_heapam.pl. + +verify_heapam() on plain heap tables and on sequences (which are heaps +under the hood): an uncorrupted table passes under every option combination, +while a table whose first page's line pointers are corrupted is reliably +detected, including interactions with VACUUM FREEZE and the skip option. +""" + +import os +import re +import struct + +# Line-pointer corruption messages verify_heapam.c emits for the bytes written +# by _corrupt_first_page (chosen to hit the checks on both endiannesses). +_HEAP_CORRUPTION_RES = [ + r"line pointer redirection to item at offset \d+ precedes minimum offset \d+", + r"line pointer redirection to item at offset \d+ exceeds maximum offset \d+", + r"line pointer to page offset \d+ is not maximally aligned", + r"line pointer length \d+ is less than the minimum tuple header size \d+", + r"line pointer to page offset \d+ with length \d+ ends beyond maximum " + r"page offset \d+", +] + +_FRESH_TABLE = """\ +DROP TABLE IF EXISTS {rel} CASCADE; +CREATE TABLE {rel} (a integer, b text); +ALTER TABLE {rel} SET (autovacuum_enabled=false); +ALTER TABLE {rel} ALTER b SET STORAGE external; +INSERT INTO {rel} (a, b) + (SELECT gs, repeat('b',gs*10) FROM generate_series(1,1000) gs); +BEGIN; +SAVEPOINT s1; +SELECT 1 FROM {rel} WHERE a = 42 FOR UPDATE; +UPDATE {rel} SET b = b WHERE a = 42; +RELEASE s1; +SAVEPOINT s1; +SELECT 1 FROM {rel} WHERE a = 42 FOR UPDATE; +UPDATE {rel} SET b = b WHERE a = 42; +COMMIT; +""" + +_FRESH_SEQUENCE = """\ +DROP SEQUENCE IF EXISTS {seq} CASCADE; +CREATE SEQUENCE {seq} + INCREMENT BY 13 + MINVALUE 17 + START WITH 23; +SELECT nextval('{seq}'); +SELECT setval('{seq}', currval('{seq}') + nextval('{seq}')); +""" + + +def _relation_filepath(node, relname): + """Return the absolute on-disk path of a relation's main fork.""" + rel = node.safe_psql("SELECT pg_relation_filepath('{}')".format(relname)) + assert rel, "path not found for relation {}".format(relname) + return os.path.join(node.datadir, rel) + + +def _fresh_test_table(node, relname): + """(Re)create and populate a test table of the given name.""" + node.safe_psql(_FRESH_TABLE.format(rel=relname)) + + +def _fresh_test_sequence(node, seqname): + """Create and exercise a test sequence of the given name.""" + node.safe_psql(_FRESH_SEQUENCE.format(seq=seqname)) + + +def _corrupt_first_page(node, relname): + """Stop the node, corrupt the first page's line pointers, restart it.""" + relpath = _relation_filepath(node, relname) + node.stop() + # Corrupt some line pointers. The values are chosen to hit the various + # line-pointer-corruption checks in verify_heapam.c on both little-endian + # and big-endian architectures (Perl pack("L*", ...) is native unsigned + # 32-bit). + payload = struct.pack( + "=6I", + 0xAAA15550, + 0xAAA0D550, + 0x00010000, + 0x00008000, + 0x0000800F, + 0x001E8000, + ) + with open(relpath, "r+b") as fh: + fh.seek(32) + fh.write(payload) + node.start() + + +def _detects_corruption(node, function, testname, regexes): + """Assert verify_heapam(...) output matches all of the given regexes.""" + result = node.safe_psql("SELECT * FROM {}".format(function)) + for pattern in regexes: + assert re.search(pattern, result), "{}\noutput:\n{}".format(testname, result) + + +def _detects_heap_corruption(node, function, testname): + """Assert verify_heapam(...) reports the expected line-pointer messages.""" + _detects_corruption(node, function, testname, _HEAP_CORRUPTION_RES) + + +def _detects_no_corruption(node, function, testname): + """Assert verify_heapam(...) reports no corruption (empty output).""" + result = node.safe_psql("SELECT * FROM {}".format(function)) + assert result == "", testname + + +def _check_all_options_uncorrupted(node, relname, prefix): + """Every option combination is stable and reports no corruption. + + relname must be an uncorrupted relation. + """ + for stop in ("true", "false"): + for check_toast in ("true", "false"): + for skip in ("'none'", "'all-frozen'", "'all-visible'"): + for startblock in ("NULL", "0"): + for endblock in ("NULL", "0"): + opts = ( + "on_error_stop := {}, " + "check_toast := {}, " + "skip := {}, " + "startblock := {}, " + "endblock := {}".format( + stop, check_toast, skip, startblock, endblock + ) + ) + _detects_no_corruption( + node, + "verify_heapam('{}', {})".format(relname, opts), + "{}: {}".format(prefix, opts), + ) + + +def _check_corrupt_table(node): + """Corrupt a fresh table and confirm detection under several options.""" + _fresh_test_table(node, "test") + _corrupt_first_page(node, "test") + _detects_heap_corruption(node, "verify_heapam('test')", "plain corrupted table") + _detects_heap_corruption( + node, + "verify_heapam('test', skip := 'all-visible')", + "plain corrupted table skipping all-visible", + ) + _detects_heap_corruption( + node, + "verify_heapam('test', skip := 'all-frozen')", + "plain corrupted table skipping all-frozen", + ) + _detects_heap_corruption( + node, + "verify_heapam('test', check_toast := false)", + "plain corrupted table skipping toast", + ) + _detects_heap_corruption( + node, + "verify_heapam('test', startblock := 0, endblock := 0)", + "plain corrupted table checking only block zero", + ) + + +def _check_all_frozen_table(node): + """A frozen table is clean; corruption is detected unless skipped.""" + _fresh_test_table(node, "test") + node.safe_psql("VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) test") + _detects_no_corruption( + node, "verify_heapam('test')", "all-frozen not corrupted table" + ) + _corrupt_first_page(node, "test") + _detects_heap_corruption( + node, "verify_heapam('test')", "all-frozen corrupted table" + ) + _detects_no_corruption( + node, + "verify_heapam('test', skip := 'all-frozen')", + "all-frozen corrupted table skipping all-frozen", + ) + + +def _check_sequence(node): + """A sequence (heap under the hood) passes checks across mutations.""" + _fresh_test_sequence(node, "test_seq") + _check_all_options_uncorrupted(node, "test_seq", "plain") + node.safe_psql("SELECT nextval('test_seq');") + _check_all_options_uncorrupted(node, "test_seq", "plain") + node.safe_psql("SELECT setval('test_seq', 102);") + _check_all_options_uncorrupted(node, "test_seq", "plain") + node.safe_psql("ALTER SEQUENCE test_seq RESTART WITH 51") + _check_all_options_uncorrupted(node, "test_seq", "plain") + + +def test_001_verify_heapam(create_pg): + """verify_heapam detects line-pointer corruption and passes clean rels.""" + node = create_pg("test", no_data_checksums=True, start=False) + node.append_conf("autovacuum=off") + node.start() + node.safe_psql("CREATE EXTENSION amcheck") + + # Uncorrupted table passes under every option combination. + _fresh_test_table(node, "test") + _check_all_options_uncorrupted(node, "test", "plain") + + _check_corrupt_table(node) + _check_all_frozen_table(node) + _check_sequence(node) diff --git a/contrib/amcheck/pyt/test_002_cic.py b/contrib/amcheck/pyt/test_002_cic.py new file mode 100644 index 0000000000000..6c7f6125d52af --- /dev/null +++ b/contrib/amcheck/pyt/test_002_cic.py @@ -0,0 +1,65 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of contrib/amcheck/t/002_cic.pl. + +amcheck verification (bt_index_check / gin_index_check / bt_index_parent_check) +holds up under concurrent INSERTs and CREATE/DROP INDEX CONCURRENTLY: a pgbench +workload runs transactions and CIC in parallel, and a parent check after a row +removed by a still-in-progress transaction reports no corruption. +""" + +import pypg + + +def test_002_cic(create_pg): + """amcheck under concurrent INSERTs and CREATE INDEX CONCURRENTLY.""" + node = create_pg("CIC_test", start=False) + node.append_conf("lock_timeout = {}".format(1000 * pypg.test_timeout_default())) + node.start() + node.safe_psql("CREATE EXTENSION amcheck") + node.safe_psql("CREATE TABLE tbl(i int, j jsonb)") + node.safe_psql("CREATE INDEX idx ON tbl(i)") + node.safe_psql("CREATE INDEX ginidx ON tbl USING gin(j)") + node.pgbench( + "--no-vacuum --client=5 --transactions=100", + 0, + [r"actually processed"], + [r"^$"], + "concurrent INSERTs and CIC", + { + "002_pgbench_concurrent_transaction": ( + "BEGIN;\n" + "INSERT INTO tbl VALUES(0," + ' \'{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}\');\n' + "COMMIT;\n" + ), + "002_pgbench_concurrent_transaction_savepoints": ( + "BEGIN;\nSAVEPOINT s1;\nINSERT INTO tbl VALUES(0, '[[14,2,3]]');\n" + "COMMIT;\n" + ), + "002_pgbench_concurrent_cic": ( + "SELECT pg_try_advisory_lock(42)::integer AS gotlock \\gset\n" + "\\if :gotlock\n" + " DROP INDEX CONCURRENTLY idx;\n" + " CREATE INDEX CONCURRENTLY idx ON tbl(i);\n" + " DROP INDEX CONCURRENTLY ginidx;\n" + " CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j);\n" + " SELECT bt_index_check('idx',true);\n" + " SELECT gin_index_check('ginidx');\n" + " SELECT pg_advisory_unlock(42);\n" + "\\endif\n" + ), + }, + ) + node.safe_psql("CREATE TABLE quebec(i int primary key)") + node.safe_psql("INSERT INTO quebec SELECT i FROM generate_series(1, 2) s(i);") + in_progress_h = node.background_psql("postgres") + in_progress_h.query("BEGIN; SELECT pg_current_xact_id();") + node.safe_psql("DELETE FROM quebec WHERE i = 1;") + node.safe_psql("CREATE INDEX CONCURRENTLY oscar ON quebec(i);") + result = node.psql_capture( + "SELECT bt_index_parent_check('oscar', heapallindexed => true)" + ) + assert result.rc == 0, "bt_index_parent_check for CIC after removed row" + in_progress_h.quit() + node.stop() diff --git a/contrib/amcheck/pyt/test_003_cic_2pc.py b/contrib/amcheck/pyt/test_003_cic_2pc.py new file mode 100644 index 0000000000000..b45d59af631b3 --- /dev/null +++ b/contrib/amcheck/pyt/test_003_cic_2pc.py @@ -0,0 +1,123 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of contrib/amcheck/t/003_cic_2pc.pl. + +CREATE INDEX CONCURRENTLY (btree and gin) interleaved with prepared (two-phase) +transactions, then across a server restart, must produce structurally valid +indexes: bt_index_check/gin_index_check pass. Finally a concurrent pgbench +workload mixes 2PC inserts with CIC/REINDEX CONCURRENTLY under an advisory lock, +checking the indexes throughout. +""" + +import pypg + + +def _build_indexes_with_2pc(node): + main_h = node.background_psql("postgres") + main_h.query_safe("BEGIN;\nINSERT INTO tbl VALUES(0, '[[14,2,3]]');\n") + cic_h = node.background_psql("postgres") + cic_h.query_until( + r"start", + "\\echo start\nCREATE INDEX CONCURRENTLY idx ON tbl(i);\n" + "CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j);\n", + ) + main_h.query_safe("PREPARE TRANSACTION 'a';\n") + main_h.query_safe("BEGIN;\nINSERT INTO tbl VALUES(0, '[[14,2,3]]');\n") + node.safe_psql("COMMIT PREPARED 'a';") + main_h.query_safe( + "PREPARE TRANSACTION 'b';\nBEGIN;\n" + "INSERT INTO tbl VALUES(0, '\"mary had a little lamb\"');\n" + ) + node.safe_psql("COMMIT PREPARED 'b';") + main_h.query_safe("PREPARE TRANSACTION 'c';\nCOMMIT PREPARED 'c';\n") + main_h.quit() + cic_h.quit() + + +def test_003_cic_2pc(create_pg): + """CIC interleaved with 2PC and across restart yields valid indexes.""" + node = create_pg("CIC_2PC_test", start=False) + node.append_conf("max_prepared_transactions = 10") + node.append_conf("lock_timeout = {}".format(1000 * pypg.test_timeout_default())) + node.start() + node.safe_psql("CREATE EXTENSION amcheck") + node.safe_psql("CREATE TABLE tbl(i int, j jsonb)") + _build_indexes_with_2pc(node) + assert ( + node.psql_capture("SELECT bt_index_check('idx',true)").rc == 0 + ), "bt_index_check after overlapping 2PC" + assert ( + node.psql_capture("SELECT gin_index_check('ginidx')").rc == 0 + ), "gin_index_check after overlapping 2PC" + node.safe_psql( + "BEGIN;\nINSERT INTO tbl VALUES(0, " + '\'{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}\');\n' + "PREPARE TRANSACTION 'spans_restart';\nBEGIN;\nCREATE TABLE unused ();\n" + "PREPARE TRANSACTION 'persists_forever';\n" + ) + node.restart() + reindex_h = node.background_psql("postgres") + reindex_h.query_until( + r"start", + "\\echo start\nDROP INDEX CONCURRENTLY idx;\n" + "CREATE INDEX CONCURRENTLY idx ON tbl(i);\n" + "DROP INDEX CONCURRENTLY ginidx;\n" + "CREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j);\n", + ) + node.safe_psql("COMMIT PREPARED 'spans_restart'") + reindex_h.quit() + assert ( + node.psql_capture("SELECT bt_index_check('idx',true)").rc == 0 + ), "bt_index_check after 2PC and restart" + assert ( + node.psql_capture("SELECT gin_index_check('ginidx')").rc == 0 + ), "gin_index_check after 2PC and restart" + node.safe_psql("REINDEX TABLE tbl;") + node.pgbench( + "--no-vacuum --client=5 --transactions=100", + 0, + [r"actually processed"], + [r"^$"], + "concurrent INSERTs w/ 2PC and CIC", + _PGBENCH_FILES, + ) + node.stop() + + +_PGBENCH_FILES = { + "003_pgbench_concurrent_2pc": ( + "BEGIN;\nINSERT INTO tbl VALUES(0,'null');\n" + "PREPARE TRANSACTION 'c:client_id';\nCOMMIT PREPARED 'c:client_id';\n" + ), + "003_pgbench_concurrent_2pc_savepoint": ( + "BEGIN;\nSAVEPOINT s1;\n" + 'INSERT INTO tbl VALUES(0,\'[false, "jnvaba", -76, 7, {"_": [1]}, 9]\');\n' + "PREPARE TRANSACTION 'c:client_id';\nCOMMIT PREPARED 'c:client_id';\n" + ), + "003_pgbench_concurrent_cic": ( + "SELECT pg_try_advisory_lock(42)::integer AS gotlock \\gset\n" + "\\if :gotlock\n\tDROP INDEX CONCURRENTLY idx;\n" + "\tCREATE INDEX CONCURRENTLY idx ON tbl(i);\n" + "\tSELECT bt_index_check('idx',true);\n" + "\tSELECT pg_advisory_unlock(42);\n\\endif\n" + ), + "004_pgbench_concurrent_ric": ( + "SELECT pg_try_advisory_lock(42)::integer AS gotlock \\gset\n" + "\\if :gotlock\n\tREINDEX INDEX CONCURRENTLY idx;\n" + "\tSELECT bt_index_check('idx',true);\n" + "\tSELECT pg_advisory_unlock(42);\n\\endif\n" + ), + "005_pgbench_concurrent_cic": ( + "SELECT pg_try_advisory_lock(42)::integer AS gotginlock \\gset\n" + "\\if :gotginlock\n\tDROP INDEX CONCURRENTLY ginidx;\n" + "\tCREATE INDEX CONCURRENTLY ginidx ON tbl USING gin(j);\n" + "\tSELECT gin_index_check('ginidx');\n" + "\tSELECT pg_advisory_unlock(42);\n\\endif\n" + ), + "006_pgbench_concurrent_ric": ( + "SELECT pg_try_advisory_lock(42)::integer AS gotginlock \\gset\n" + "\\if :gotginlock\n\tREINDEX INDEX CONCURRENTLY ginidx;\n" + "\tSELECT gin_index_check('ginidx');\n" + "\tSELECT pg_advisory_unlock(42);\n\\endif\n" + ), +} diff --git a/contrib/amcheck/pyt/test_004_verify_nbtree_unique.py b/contrib/amcheck/pyt/test_004_verify_nbtree_unique.py new file mode 100644 index 0000000000000..8a45c9991e796 --- /dev/null +++ b/contrib/amcheck/pyt/test_004_verify_nbtree_unique.py @@ -0,0 +1,72 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of contrib/amcheck/t/004_verify_nbtree_unique.pl. + +amcheck bt_index_check uniqueness verification: a corrupted comparator that +makes distinct values compare equal is detected as a unique-index violation +(with and without deduplication). +Generated from the Perl original via .agent/gen_golden.py. +""" + +import re + + +def test_004_verify_nbtree_unique(create_pg): + """amcheck bt_index_check uniqueness verification: a corrupted comparator that.""" + node = create_pg("test", start=False) + node.append_conf("autovacuum = off") + node.start() + node.safe_psql( + "CREATE EXTENSION amcheck;\n\n\tCREATE FUNCTION ok_cmp (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT\n\t\t\tCASE WHEN $1 < $2 THEN -1\n\t\t\t\t WHEN $1 > $2 THEN 1\n\t\t\t\t ELSE 0\n\t\t\tEND;\n\t$$;\n\n\t---\n\t--- Check 1: uniqueness violation.\n\t---\n\tCREATE FUNCTION ok_cmp1 (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT public.ok_cmp($1, $2);\n\t$$;\n\n\t---\n\t--- Make values 768 and 769 look equal.\n\t---\n\tCREATE FUNCTION bad_cmp1 (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT\n\t\t\tCASE WHEN ($1 = 768 AND $2 = 769) OR\n\t\t\t\t\t ($1 = 769 AND $2 = 768) THEN 0\n\t\t\t\t ELSE public.ok_cmp($1, $2)\n\t\t\tEND;\n\t$$;\n\n\t---\n\t--- Check 2: uniqueness violation without deduplication.\n\t---\n\tCREATE FUNCTION ok_cmp2 (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT public.ok_cmp($1, $2);\n\t$$;\n\n\tCREATE FUNCTION bad_cmp2 (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT\n\t\t\tCASE WHEN $1 = $2 AND $1 = 400 THEN -1\n\t\t\tELSE public.ok_cmp($1, $2)\n\t\tEND;\n\t$$;\n\n\t---\n\t--- Check 3: uniqueness violation with deduplication.\n\t---\n\tCREATE FUNCTION ok_cmp3 (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT public.ok_cmp($1, $2);\n\t$$;\n\n\tCREATE FUNCTION bad_cmp3 (int4, int4)\n\tRETURNS int LANGUAGE sql AS\n\t$$\n\t\tSELECT public.bad_cmp2($1, $2);\n\t$$;\n\n\t---\n\t--- Create data.\n\t---\n\tCREATE TABLE bttest_unique1 (i int4);\n\tINSERT INTO bttest_unique1\n\t\t(SELECT * FROM generate_series(1, 1024) gs);\n\n\tCREATE TABLE bttest_unique2 (i int4);\n\tINSERT INTO bttest_unique2(i)\n\t\t(SELECT * FROM generate_series(1, 400) gs);\n\tINSERT INTO bttest_unique2\n\t\t(SELECT * FROM generate_series(400, 1024) gs);\n\n\tCREATE TABLE bttest_unique3 (i int4);\n\tINSERT INTO bttest_unique3\n\t\tSELECT * FROM bttest_unique2;\n\n\tCREATE OPERATOR CLASS int4_custom_ops1 FOR TYPE int4 USING btree AS\n\t\tOPERATOR 1 < (int4, int4), OPERATOR 2 <= (int4, int4),\n\t\tOPERATOR 3 = (int4, int4), OPERATOR 4 >= (int4, int4),\n\t\tOPERATOR 5 > (int4, int4), FUNCTION 1 ok_cmp1(int4, int4);\n\tCREATE OPERATOR CLASS int4_custom_ops2 FOR TYPE int4 USING btree AS\n\t\tOPERATOR 1 < (int4, int4), OPERATOR 2 <= (int4, int4),\n\t\tOPERATOR 3 = (int4, int4), OPERATOR 4 >= (int4, int4),\n\t\tOPERATOR 5 > (int4, int4), FUNCTION 1 bad_cmp2(int4, int4);\n\tCREATE OPERATOR CLASS int4_custom_ops3 FOR TYPE int4 USING btree AS\n\t\tOPERATOR 1 < (int4, int4), OPERATOR 2 <= (int4, int4),\n\t\tOPERATOR 3 = (int4, int4), OPERATOR 4 >= (int4, int4),\n\t\tOPERATOR 5 > (int4, int4), FUNCTION 1 bad_cmp3(int4, int4);\n\n\tCREATE UNIQUE INDEX bttest_unique_idx1\n\t\t\t\t\t\tON bttest_unique1\n\t\t\t\t\t\tUSING btree (i int4_custom_ops1)\n\t\t\t\t\t\tWITH (deduplicate_items = off);\n\tCREATE UNIQUE INDEX bttest_unique_idx2\n\t\t\t\t\t\tON bttest_unique2\n\t\t\t\t\t\tUSING btree (i int4_custom_ops2)\n\t\t\t\t\t\tWITH (deduplicate_items = off);\n\tCREATE UNIQUE INDEX bttest_unique_idx3\n\t\t\t\t\t\tON bttest_unique3\n\t\t\t\t\t\tUSING btree (i int4_custom_ops3)\n\t\t\t\t\t\tWITH (deduplicate_items = on);" + ) + result = node.safe_psql("SELECT bt_index_check('bttest_unique_idx1', true, true);") + assert result == "", "run amcheck on non-broken bttest_unique_idx1" + node.safe_psql( + "UPDATE pg_catalog.pg_amproc SET\n\t\t amproc = 'bad_cmp1'::regproc\n\tWHERE amproc = 'ok_cmp1'::regproc;" + ) + result = node.psql_capture( + "SELECT bt_index_check('bttest_unique_idx1', true, true);" + ) + assert re.search( + r'''index uniqueness is violated for index "bttest_unique_idx1"''', + result.stderr, + ), 'detected uniqueness violation for index "bttest_unique_idx1"' + result = node.psql_capture( + "SELECT bt_index_check('bttest_unique_idx2', true, true);" + ) + assert re.search( + r'''item order invariant violated for index "bttest_unique_idx2"''', + result.stderr, + ), 'detected item order invariant violation for index "bttest_unique_idx2"' + node.safe_psql( + "UPDATE pg_catalog.pg_amproc SET\n\t\t amproc = 'ok_cmp2'::regproc\n\tWHERE amproc = 'bad_cmp2'::regproc;" + ) + result = node.psql_capture( + "SELECT bt_index_check('bttest_unique_idx2', true, true);" + ) + assert re.search( + r'''index uniqueness is violated for index "bttest_unique_idx2"''', + result.stderr, + ), 'detected uniqueness violation for index "bttest_unique_idx2"' + result = node.psql_capture( + "SELECT bt_index_check('bttest_unique_idx3', true, true);" + ) + assert re.search( + r'''item order invariant violated for index "bttest_unique_idx3"''', + result.stderr, + ), 'detected item order invariant violation for index "bttest_unique_idx3"' + node.safe_psql( + "DELETE FROM bttest_unique3 WHERE 380 <= i AND i <= 420;\n\tINSERT INTO bttest_unique3 (SELECT * FROM generate_series(380, 420));\n\tINSERT INTO bttest_unique3 VALUES (400);\n\tDELETE FROM bttest_unique3 WHERE 380 <= i AND i <= 420;\n\tINSERT INTO bttest_unique3 (SELECT * FROM generate_series(380, 420));\n\tINSERT INTO bttest_unique3 VALUES (400);\n\tDELETE FROM bttest_unique3 WHERE 380 <= i AND i <= 420;\n\tINSERT INTO bttest_unique3 (SELECT * FROM generate_series(380, 420));\n\tINSERT INTO bttest_unique3 VALUES (400);" + ) + node.safe_psql( + "UPDATE pg_catalog.pg_amproc SET\n\t\t amproc = 'ok_cmp3'::regproc\n\tWHERE amproc = 'bad_cmp3'::regproc;" + ) + result = node.psql_capture( + "SELECT bt_index_check('bttest_unique_idx3', true, true);" + ) + assert re.search( + r'''index uniqueness is violated for index "bttest_unique_idx3"''', + result.stderr, + ), 'detected uniqueness violation for index "bttest_unique_idx3"' + node.stop() diff --git a/contrib/amcheck/pyt/test_005_pitr.py b/contrib/amcheck/pyt/test_005_pitr.py new file mode 100644 index 0000000000000..f1c55315d10cb --- /dev/null +++ b/contrib/amcheck/pyt/test_005_pitr.py @@ -0,0 +1,79 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of contrib/amcheck/t/005_pitr.pl. + +Test integrity of intermediate states by PITR to those states. An origin node +generates WAL with an interrupted btree leaf-page deletion, then a replica +recovers to exactly the UNLINK_PAGE LSN and promotes; amcheck's +bt_index_parent_check must detect the interrupted page deletion and still pass. +""" + +_SETUP = """\ +BEGIN; +CREATE EXTENSION amcheck; +CREATE EXTENSION pg_walinspect; +CREATE TABLE not_leftmost (c text STORAGE PLAIN); +INSERT INTO not_leftmost + SELECT repeat(n::text, database_block_size / 4) + FROM generate_series(1,6) t(n), pg_control_init(); +ALTER TABLE not_leftmost ADD CONSTRAINT not_leftmost_pk PRIMARY KEY (c); +DELETE FROM not_leftmost WHERE c ~ '^[1-4]'; +SELECT pg_create_physical_replication_slot('for_walinspect', true, false); +COMMIT; +""" + + +def _vacuum_sql(before_vacuum_lsn): + """SQL that VACUUMs the leaf page and returns the UNLINK_PAGE LSN.""" + return ( + "SET synchronous_commit = off;\n" + "VACUUM (VERBOSE, INDEX_CLEANUP ON) not_leftmost;\n" + "CREATE TABLE XLogFlush ();\n" + "DROP TABLE XLogFlush;\n" + "SELECT max(start_lsn)\n" + " FROM pg_get_wal_records_info('{}', 'FFFFFFFF/FFFFFFFF')\n" + " WHERE resource_manager = 'Btree' " + "AND record_type = 'UNLINK_PAGE';".format(before_vacuum_lsn) + ) + + +def test_005_pitr(create_pg): + """PITR to an interrupted-page-deletion state; amcheck detects and passes.""" + origin = create_pg("origin", has_archiving=True, allows_streaming=True, start=False) + origin.append_conf("autovacuum = off") + origin.start() + origin.backup("my_backup") + + origin.safe_psql(_SETUP) + before_vacuum_lsn = origin.safe_psql("SELECT pg_current_wal_lsn()") + unlink_lsn = origin.safe_psql(_vacuum_sql(before_vacuum_lsn)) + origin.stop() + assert unlink_lsn, "did not find UNLINK_PAGE record" + + replica = create_pg( + "replica", + from_backup=(origin, "my_backup"), + has_restoring=True, + start=False, + ) + replica.append_conf("recovery_target_lsn = '{}'".format(unlink_lsn)) + replica.append_conf("recovery_target_inclusive = off") + replica.append_conf("recovery_target_action = promote") + replica.start() + assert replica.poll_query_until( + "SELECT pg_is_in_recovery() = 'f';" + ), "Timed out while waiting for PITR promotion" + + debug = "SET client_min_messages = 'debug1'" + result = replica.psql_capture( + "{}; SELECT bt_index_parent_check('not_leftmost_pk', true)".format(debug) + ) + assert result.rc == 0, "bt_index_parent_check passes" + assert ( + "interrupted page deletion detected" in result.stderr + ), "bt_index_parent_check: interrupted page deletion detected" + + result = replica.psql_capture( + "{}; SELECT bt_index_check('not_leftmost_pk', true)".format(debug) + ) + assert result.rc == 0, "bt_index_check passes" diff --git a/contrib/amcheck/pyt/test_006_verify_gin.py b/contrib/amcheck/pyt/test_006_verify_gin.py new file mode 100644 index 0000000000000..64e9187a08520 --- /dev/null +++ b/contrib/amcheck/pyt/test_006_verify_gin.py @@ -0,0 +1,254 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of contrib/amcheck/t/006_verify_gin.pl. + +gin_index_check() detects deliberately corrupted GIN index pages: wrong entry +order on leaf and inner entry-tree pages, swapped column attribute numbers, +parent/child key inconsistencies after a split, and a posting-tree leaf whose +TIDs exceed the parent's high key. Each scenario corrupts a specific block on +disk (with the server stopped) and asserts the exact detector message. +""" + +import os +import re +import struct + +# To force splits fast we want large-but-not-toasted tuples. +_FILLER_SIZE = 1900 + +_RANDOM_STRING_FN = ( + "CREATE OR REPLACE FUNCTION random_string( INT ) RETURNS text AS $$\n" + "SELECT string_agg(substring(" + "'0123456789abcdefghijklmnopqrstuvwxyz', " + "ceil(random() * 36)::integer, 1), '') " + "from generate_series(1, $1);\n" + "$$ LANGUAGE SQL;" +) + + +def _relation_filepath(node, relname): + """Return the absolute on-disk path of a relation's main fork.""" + rel = node.safe_psql("SELECT pg_relation_filepath('{}')".format(relname)) + assert rel, "path not found for relation {}".format(relname) + return os.path.join(node.datadir, rel) + + +def _string_replace_block(filename, find, replace, blkno, blksize): + """Replace find with replace within block blkno of filename (raw bytes). + + find may be a bytes literal (substituted everywhere it occurs) or a + compiled bytes regex (re.sub with backreferences in the bytes replace). + """ + offset = blkno * blksize + with open(filename, "r+b") as fh: + fh.seek(offset) + buffer = fh.read(blksize) + if isinstance(find, (bytes, bytearray)): + buffer = buffer.replace(bytes(find), bytes(replace)) + else: + buffer = find.sub(replace, buffer) + assert len(buffer) == blksize, "block size changed during replace" + fh.seek(offset) + fh.write(buffer) + + +def _gin_check_stderr(node, indexname): + """Run gin_index_check(indexname) and return psql's stderr.""" + result = node.psql_capture( + "SELECT gin_index_check('{}')".format(indexname), on_error_stop=False + ) + return result.stderr + + +def _wrong_order_expected(indexname): + """The 'wrong tuple order on entry tree page' message for indexname.""" + return ( + 'index "{}" has wrong tuple order on entry tree page, ' + "block 1, offset 2, rightlink 4294967295".format(indexname) + ) + + +def _insert_filler_rows(node, relname, prefixes): + """Insert one row per prefix with a filler-padded text[] value.""" + for prefix in prefixes: + node.safe_psql( + "INSERT INTO {} (a) VALUES " + "(('{{' || '{}' || random_string({}) ||'}}')::text[]);".format( + relname, prefix, _FILLER_SIZE + ) + ) + + +def _invalid_entry_order_leaf_page(node, blksize): + """Wrong entry order on a leaf page: replace aaaaa with ccccc in root.""" + relname, indexname = "test", "test_gin_idx" + node.safe_psql( + "DROP TABLE IF EXISTS {rel};\n" + "CREATE TABLE {rel} (a text[]);\n" + "INSERT INTO {rel} (a) VALUES ('{{aaaaa,bbbbb}}');\n" + "CREATE INDEX {idx} ON {rel} USING gin (a);".format(rel=relname, idx=indexname) + ) + relpath = _relation_filepath(node, indexname) + node.stop() + _string_replace_block(relpath, b"aaaaa", b"ccccc", 1, blksize) + node.start() + assert re.search( + _wrong_order_expected(indexname), _gin_check_stderr(node, indexname) + ) + + +def _invalid_entry_order_inner_page(node, blksize): + """Wrong entry order on an inner page (needs two splits).""" + relname, indexname = "test", "test_gin_idx" + node.safe_psql( + "DROP TABLE IF EXISTS {rel};\n" + "CREATE TABLE {rel} (a text[]);".format(rel=relname) + ) + _insert_filler_rows( + node, + relname, + [ + "pppppppppp", + "qqqqqqqqqq", + "rrrrrrrrrr", + "ssssssssss", + "tttttttttt", + "uuuuuuuuuu", + "vvvvvvvvvv", + "wwwwwwwwww", + ], + ) + node.safe_psql( + "CREATE INDEX {idx} ON {rel} USING gin (a);".format(rel=relname, idx=indexname) + ) + relpath = _relation_filepath(node, indexname) + node.stop() + # rrrrrrrrrr and tttttttttt are keys in the root; break order on the first. + _string_replace_block(relpath, b"rrrrrrrrrr", b"zzzzzzzzzz", 1, blksize) + node.start() + assert re.search( + _wrong_order_expected(indexname), _gin_check_stderr(node, indexname) + ) + + +def _invalid_entry_columns_order(node, blksize): + """Swapped attribute numbers in the root produce wrong column order.""" + relname, indexname = "test", "test_gin_idx" + node.safe_psql( + "DROP TABLE IF EXISTS {rel};\n" + "CREATE TABLE {rel} (a text[],b text[]);\n" + "INSERT INTO {rel} (a,b) VALUES ('{{aaa}}','{{bbb}}');\n" + "CREATE INDEX {idx} ON {rel} USING gin (a,b);".format( + rel=relname, idx=indexname + ) + ) + relpath = _relation_filepath(node, indexname) + node.stop() + # root items order before: (1,aaa), (2,bbb); after: (2,aaa), (1,bbb) + attrno_1 = struct.pack("=h", 1) + attrno_2 = struct.pack("=h", 2) + _string_replace_block( + relpath, + re.compile(re.escape(attrno_1) + rb"(.)(aaa)", re.DOTALL), + attrno_2 + rb"\1\2", + 1, + blksize, + ) + _string_replace_block( + relpath, + re.compile(re.escape(attrno_2) + rb"(.)(bbb)", re.DOTALL), + attrno_1 + rb"\1\2", + 1, + blksize, + ) + node.start() + assert re.search( + _wrong_order_expected(indexname), _gin_check_stderr(node, indexname) + ) + + +def _split_table_lmnxy(node, relname, indexname): + """Create a GIN index whose entry tree splits (l/m/n/x/y prefixes).""" + node.safe_psql( + "DROP TABLE IF EXISTS {rel};\n" + "CREATE TABLE {rel} (a text[]);".format(rel=relname) + ) + _insert_filler_rows( + node, + relname, + ["llllllllll", "mmmmmmmmmm", "nnnnnnnnnn", "xxxxxxxxxx", "yyyyyyyyyy"], + ) + node.safe_psql( + "CREATE INDEX {idx} ON {rel} USING gin (a);".format(rel=relname, idx=indexname) + ) + + +def _inconsistent_parent_key_parent_corrupted(node, blksize): + """Parent key smaller than child keys: inconsistent records on page 3.""" + relname, indexname = "test", "test_gin_idx" + _split_table_lmnxy(node, relname, indexname) + relpath = _relation_filepath(node, indexname) + node.stop() + # nnnnnnnnnn is a parent key in the root; make it smaller than child keys. + _string_replace_block(relpath, b"nnnnnnnnnn", b"aaaaaaaaaa", 1, blksize) + node.start() + expected = 'index "{}" has inconsistent records on page 3 offset 3'.format( + indexname + ) + assert re.search(expected, _gin_check_stderr(node, indexname)) + + +def _inconsistent_parent_key_child_corrupted(node, blksize): + """Child key bigger than parent key: inconsistent records on page 3.""" + relname, indexname = "test", "test_gin_idx" + _split_table_lmnxy(node, relname, indexname) + relpath = _relation_filepath(node, indexname) + node.stop() + # nnnnnnnnnn is the parent key in the root; make the child key bigger. + _string_replace_block(relpath, b"nnnnnnnnnn", b"pppppppppp", 3, blksize) + node.start() + expected = 'index "{}" has inconsistent records on page 3 offset 3'.format( + indexname + ) + assert re.search(expected, _gin_check_stderr(node, indexname)) + + +def _inconsistent_parent_key_posting_tree(node, blksize): + """Posting-tree leaf TIDs exceed a corrupted parent high key.""" + relname, indexname = "test", "test_gin_idx" + node.safe_psql( + "DROP TABLE IF EXISTS {rel};\n" + "CREATE TABLE {rel} (a text[]);\n" + "INSERT INTO {rel} (a) select ('{{aaaaa}}') from generate_series(1,10000);\n" + "CREATE INDEX {idx} ON {rel} USING gin (a);".format(rel=relname, idx=indexname) + ) + relpath = _relation_filepath(node, indexname) + node.stop() + # Posting tree for 'aaaaa' has its root at block 2 and leaves 3 and 4. + # Replace block 4's high key with (1,1) so leaf TIDs exceed it. + find = re.compile(re.escape(struct.pack("=HHH", 0, 4, 0)) + rb"....", re.DOTALL) + replace = struct.pack("=HHHHH", 0, 4, 0, 1, 1) + _string_replace_block(relpath, find, replace, 2, blksize) + node.start() + expected = ( + 'index "{}": tid exceeds parent\'s high key in postingTree ' + "leaf on block 4".format(indexname) + ) + assert re.search(expected, _gin_check_stderr(node, indexname)) + + +def test_006_verify_gin(create_pg): + """gin_index_check detects deliberately corrupted GIN index pages.""" + node = create_pg("test", no_data_checksums=True, start=False) + node.append_conf("autovacuum=off") + node.start() + blksize = int(node.safe_psql("SHOW block_size;")) + node.safe_psql("CREATE EXTENSION amcheck") + node.safe_psql(_RANDOM_STRING_FN) + + _invalid_entry_order_leaf_page(node, blksize) + _invalid_entry_order_inner_page(node, blksize) + _invalid_entry_columns_order(node, blksize) + _inconsistent_parent_key_parent_corrupted(node, blksize) + _inconsistent_parent_key_child_corrupted(node, blksize) + _inconsistent_parent_key_posting_tree(node, blksize) diff --git a/contrib/auto_explain/meson.build b/contrib/auto_explain/meson.build index d2b0650af1cbf..24b4e35eb9b2f 100644 --- a/contrib/auto_explain/meson.build +++ b/contrib/auto_explain/meson.build @@ -31,4 +31,9 @@ tests += { 't/001_auto_explain.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_auto_explain.py', + ], + }, } diff --git a/contrib/auto_explain/pyt/test_001_auto_explain.py b/contrib/auto_explain/pyt/test_001_auto_explain.py new file mode 100644 index 0000000000000..f86d779bbb8a8 --- /dev/null +++ b/contrib/auto_explain/pyt/test_001_auto_explain.py @@ -0,0 +1,196 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of contrib/auto_explain/t/001_auto_explain.pl. + +auto_explain logs plans for statements: query text, parameters (with truncation +and disabling), seq/index scan nodes, query identifier (compute_query_id), +JSON format, extension options, non-superuser SET-permission behavior, and +pg_get_loaded_modules() reporting. Each query is run with per-query GUCs via +PGOPTIONS and the freshly appended log slice is examined. +""" + +import re + +import pypg + + +def _query_log(node, sql, params=None, user=None): + extra_env = {} + if params: + extra_env["PGOPTIONS"] = " ".join( + "-c {}={}".format(k, v) for k, v in params.items() + ) + if user: + extra_env["PGUSER"] = user + offset = node.current_log_position() + node.safe_psql(sql, extra_env=extra_env or None) + return pypg.slurp_file(node.log, offset) + + +def _like(log, pattern, msg, flags=0): + assert re.search(pattern, log, flags), "{}: {!r} not found".format(msg, pattern) + + +def _unlike(log, pattern, msg): + assert not re.search(pattern, log), "{}: {!r} unexpectedly found".format( + msg, pattern + ) + + +def test_001_auto_explain(create_pg): + """auto_explain plan logging across formats, parameters, and permissions.""" + node = create_pg("main", auth_extra=["--create-role", "regress_user1"], start=False) + node.append_conf("session_preload_libraries = 'pg_overexplain,auto_explain'") + node.append_conf("auto_explain.log_min_duration = 0") + node.append_conf("auto_explain.log_analyze = on") + node.start() + log = _query_log(node, "SELECT * FROM pg_class;") + _like(log, r"Query Text: SELECT \* FROM pg_class;", "query text logged, text mode") + _unlike( + log, r"Query Parameters:", "no query parameters logged when none, text mode" + ) + _like(log, r"Seq Scan on pg_class", "sequential scan logged, text mode") + log = _query_log( + node, + "PREPARE get_proc(name) AS SELECT * FROM pg_proc WHERE proname = $1; " + "EXECUTE get_proc('int4pl');", + ) + _like( + log, + r"Query Text: PREPARE get_proc\(name\) AS SELECT \* FROM pg_proc WHERE proname = \$1;", + "prepared query text logged, text mode", + ) + _like( + log, r"Query Parameters: \$1 = 'int4pl'", "query parameters logged, text mode" + ) + _like( + log, + r"Index Scan using pg_proc_proname_args_nsp_index on pg_proc", + "index scan logged, text mode", + ) + log = _query_log( + node, + "PREPARE get_type(name) AS SELECT * FROM pg_type WHERE typname = $1; " + "EXECUTE get_type('float8');", + {"auto_explain.log_parameter_max_length": 3}, + ) + _like( + log, + r"Query Text: PREPARE get_type\(name\) AS SELECT \* FROM pg_type WHERE typname = \$1;", + "prepared query text logged, text mode", + ) + _like( + log, + r"Query Parameters: \$1 = 'flo\.\.\.'", + "query parameters truncated, text mode", + ) + log = _query_log( + node, + "PREPARE get_type(name) AS SELECT * FROM pg_type WHERE typname = $1; " + "EXECUTE get_type('float8');", + {"auto_explain.log_parameter_max_length": 0}, + ) + _like( + log, + r"Query Text: PREPARE get_type\(name\) AS SELECT \* FROM pg_type WHERE typname = \$1;", + "prepared query text logged, text mode", + ) + _unlike( + log, + r"Query Parameters:", + "query parameters not logged when disabled, text mode", + ) + log = _query_log( + node, + "SELECT * FROM pg_class;", + {"auto_explain.log_verbose": "on", "compute_query_id": "on"}, + ) + _like( + log, + r"Query Identifier:", + "query identifier logged with compute_query_id=on, text mode", + ) + log = _query_log( + node, + "SELECT * FROM pg_class;", + {"auto_explain.log_verbose": "on", "compute_query_id": "regress"}, + ) + _unlike( + log, + r"Query Identifier:", + "query identifier not logged with compute_query_id=regress, text mode", + ) + log = _query_log( + node, "SELECT * FROM pg_class;", {"auto_explain.log_format": "json"} + ) + _like( + log, r'"Query Text": "SELECT \* FROM pg_class;"', "query text logged, json mode" + ) + _unlike( + log, r'"Query Parameters":', "query parameters not logged when none, json mode" + ) + _like( + log, + r'"Node Type": "Seq Scan"[^}]*"Relation Name": "pg_class"', + "sequential scan logged, json mode", + re.DOTALL, + ) + log = _query_log( + node, + "PREPARE get_class(name) AS SELECT * FROM pg_class WHERE relname = $1; " + "EXECUTE get_class('pg_class');", + {"auto_explain.log_format": "json"}, + ) + _like( + log, + r'"Query Text": "PREPARE get_class\(name\) AS SELECT \* FROM pg_class WHERE relname = \$1;"', + "prepared query text logged, json mode", + ) + _like( + log, + r'"Node Type": "Index Scan"[^}]*"Index Name": "pg_class_relname_nsp_index"', + "index scan logged, json mode", + re.DOTALL, + ) + log = _query_log(node, "SELECT 1;", {"auto_explain.log_extension_options": "debug"}) + _like(log, r"Parallel Safe:", "extension option produces per-node output") + _like(log, r"Command Type: select", "extension option produces per-plan output") + node.safe_psql( + "CREATE USER regress_user1;\n" + "GRANT SET ON PARAMETER auto_explain.log_format TO regress_user1;" + ) + log = _query_log( + node, + "SELECT * FROM pg_database;", + {"auto_explain.log_format": "json"}, + user="regress_user1", + ) + _like( + log, + r'"Query Text": "SELECT \* FROM pg_database;"', + "query text logged, json mode selected by non-superuser", + ) + log = _query_log( + node, + "SELECT * FROM pg_database;", + {"auto_explain.log_level": "log"}, + user="regress_user1", + ) + _like( + log, + r'WARNING: ( 42501:)? permission denied to set parameter "auto_explain\.log_level"', + "permission failure logged", + ) + node.safe_psql( + "REVOKE SET ON PARAMETER auto_explain.log_format FROM regress_user1;\n" + "DROP USER regress_user1;" + ) + res = node.safe_psql( + "SELECT module_name,\n" + " version = current_setting('server_version') as version_ok,\n" + " regexp_replace(file_name, '\\..*', '') as file_name_stripped\n" + "FROM pg_get_loaded_modules()\n" + "WHERE module_name = 'auto_explain';" + ) + _like(res, r"^auto_explain\|t\|auto_explain$", "pg_get_loaded_modules() ok") diff --git a/contrib/basebackup_to_shell/meson.build b/contrib/basebackup_to_shell/meson.build index eb23a9fec81fc..ed3eaee391915 100644 --- a/contrib/basebackup_to_shell/meson.build +++ b/contrib/basebackup_to_shell/meson.build @@ -27,4 +27,11 @@ tests += { 'env': {'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', 'TAR': tar.found() ? tar.full_path() : '' }, }, + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + ], + 'env': {'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', + 'TAR': tar.found() ? tar.full_path() : '' }, + }, } diff --git a/contrib/basebackup_to_shell/pyt/test_001_basic.py b/contrib/basebackup_to_shell/pyt/test_001_basic.py new file mode 100644 index 0000000000000..8b630ff7f88f6 --- /dev/null +++ b/contrib/basebackup_to_shell/pyt/test_001_basic.py @@ -0,0 +1,127 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +"""Port of contrib/basebackup_to_shell/t/001_basic.pl. + +The basebackup_to_shell module streams a base backup through a configured shell +command. The command must be configured; a target detail is permitted only when +the command template includes %d (and required when it does); an optional +required_role gates access. Successful backups produce gzip'd files that +decompress, untar, and verify with pg_verifybackup. Skips without gzip. +""" + +import os +import subprocess + +import pytest + +_PG_BASEBACKUP_CMD = [ + "pg_basebackup", + "--no-sync", + "--checkpoint", + "fast", + "--username", + "backupuser", + "--wal-method", + "fetch", +] + + +def _verify_backup(node, gzip, prefix, backup_dir, test_name, tmp_path): + """Assert the gzip'd manifest/tar exist, then decompress/untar/verify.""" + assert os.path.isfile( + "{}/{}backup_manifest.gz".format(backup_dir, prefix) + ), "{}: backup_manifest.gz was created".format(test_name) + assert os.path.isfile( + "{}/{}base.tar.gz".format(backup_dir, prefix) + ), "{}: base.tar.gz was created".format(test_name) + tar = os.environ.get("TAR") + if not tar: + return + subprocess.run( + [gzip, "-d", "{}/{}backup_manifest.gz".format(backup_dir, prefix)], + check=True, + ) + subprocess.run( + [gzip, "-d", "{}/{}base.tar.gz".format(backup_dir, prefix)], check=True + ) + extract_path = tmp_path / "extract_{}".format(prefix or "nodetail") + extract_path.mkdir() + subprocess.run( + [ + tar, + "xf", + "{}/{}base.tar".format(backup_dir, prefix), + "-C", + str(extract_path), + ], + check=True, + ) + node.command_ok( + [ + "pg_verifybackup", + "--no-parse-wal", + "--manifest-path", + "{}/{}backup_manifest".format(backup_dir, prefix), + "--exit-on-error", + str(extract_path), + ], + "{}: backup verifies ok".format(test_name), + ) + + +def test_001_basic(create_pg, tmp_path): + """basebackup_to_shell config gating and gzip'd backup verification.""" + gzip = os.environ.get("GZIP_PROGRAM") + if not gzip: + pytest.skip("gzip not available") + node = create_pg( + "primary", + allows_streaming=True, + auth_extra=["--create-role", "backupuser"], + start=False, + ) + node.append_conf("shared_preload_libraries = 'basebackup_to_shell'") + node.start() + node.safe_psql("CREATE USER backupuser REPLICATION") + node.safe_psql("CREATE ROLE trustworthy") + node.command_fails_like( + _PG_BASEBACKUP_CMD + ["--target", "shell"], + r"shell command for backup is not configured", + "fails if basebackup_to_shell.command is not set", + ) + backup_path = tmp_path / "backup" + backup_path.mkdir() + shell_command = '"{}" --fast > "{}/%f.gz"'.format(gzip, backup_path) + node.append_conf("basebackup_to_shell.command='{}'".format(shell_command)) + node.reload() + node.command_ok( + _PG_BASEBACKUP_CMD + ["--target", "shell"], + "backup with no detail: pg_basebackup", + ) + _verify_backup(node, gzip, "", str(backup_path), "backup with no detail", tmp_path) + node.command_fails_like( + _PG_BASEBACKUP_CMD + ["--target", "shell:foo"], + r"a target detail is not permitted because the configured command " + r"does not include %d", + "fails if detail provided without %d", + ) + shell_command = '"{}" --fast > "{}/%d.%f.gz"'.format(gzip, backup_path) + node.append_conf("basebackup_to_shell.command='{}'".format(shell_command)) + node.append_conf("basebackup_to_shell.required_role='trustworthy'") + node.reload() + node.command_fails_like( + _PG_BASEBACKUP_CMD + ["--target", "shell"], + r"permission denied to use basebackup_to_shell", + "fails if required_role not granted", + ) + node.safe_psql("GRANT trustworthy TO backupuser") + node.command_fails_like( + _PG_BASEBACKUP_CMD + ["--target", "shell"], + r"a target detail is required because the configured command includes %d", + "fails if %d is present and detail not given", + ) + node.command_ok( + _PG_BASEBACKUP_CMD + ["--target", "shell:bar"], + "backup with detail: pg_basebackup", + ) + _verify_backup(node, gzip, "bar.", str(backup_path), "backup with detail", tmp_path) diff --git a/contrib/bloom/meson.build b/contrib/bloom/meson.build index fa4f4ea796ba3..60caa0baab2dc 100644 --- a/contrib/bloom/meson.build +++ b/contrib/bloom/meson.build @@ -42,4 +42,9 @@ tests += { 't/001_wal.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_wal.py', + ], + }, } diff --git a/contrib/bloom/pyt/test_001_wal.py b/contrib/bloom/pyt/test_001_wal.py new file mode 100644 index 0000000000000..2cc2c82feb06e --- /dev/null +++ b/contrib/bloom/pyt/test_001_wal.py @@ -0,0 +1,61 @@ +# Copyright (c) 2018-2026, PostgreSQL Global Development Group + +"""Port of contrib/bloom/t/001_wal.pl. + +Bloom-index WAL replay: a bloom index built and repeatedly modified +(delete/vacuum/insert cycles) on a streaming primary must replay identically on +a hot standby, so index scans return the same rows on both nodes at every step. +""" + +_QUERIES = """SET enable_seqscan=off; +SET enable_bitmapscan=on; +SET enable_indexscan=on; +SELECT * FROM tst WHERE i = 0; +SELECT * FROM tst WHERE i = 3; +SELECT * FROM tst WHERE t = 'b'; +SELECT * FROM tst WHERE t = 'f'; +SELECT * FROM tst WHERE i = 3 AND t = 'c'; +SELECT * FROM tst WHERE i = 7 AND t = 'e'; +""" + + +def _test_index_replay(primary, standby, test_name): + """Wait for catch-up, then assert index-scan results match on both nodes.""" + primary.wait_for_catchup(standby) + primary_result = primary.safe_psql(_QUERIES) + standby_result = standby.safe_psql(_QUERIES) + assert primary_result == standby_result, "{}: query result matches".format( + test_name + ) + + +def test_001_wal(create_pg): + """Bloom index changes replay identically on a hot standby.""" + node_primary = create_pg("primary", allows_streaming=True) + backup_name = "my_backup" + node_primary.backup(backup_name) + node_standby = create_pg( + "standby", from_backup=(node_primary, backup_name), has_streaming=True + ) + node_primary.safe_psql("CREATE EXTENSION bloom;") + node_primary.safe_psql("CREATE TABLE tst (i int4, t text);") + node_primary.safe_psql( + "INSERT INTO tst SELECT i%10, substr(encode(sha256(i::text::bytea), " + "'hex'), 1, 1) FROM generate_series(1,10000) i;" + ) + node_primary.safe_psql( + "CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);" + ) + _test_index_replay(node_primary, node_standby, "initial") + for i in range(1, 11): + node_primary.safe_psql("DELETE FROM tst WHERE i = {};".format(i)) + _test_index_replay(node_primary, node_standby, "delete {}".format(i)) + node_primary.safe_psql("VACUUM tst;") + _test_index_replay(node_primary, node_standby, "vacuum {}".format(i)) + start = 100001 + (i - 1) * 10000 + end = 100000 + i * 10000 + node_primary.safe_psql( + "INSERT INTO tst SELECT i%10, substr(encode(sha256(i::text::bytea), " + "'hex'), 1, 1) FROM generate_series({},{}) i;".format(start, end) + ) + _test_index_replay(node_primary, node_standby, "insert {}".format(i)) diff --git a/contrib/dblink/meson.build b/contrib/dblink/meson.build index e2489f41229fa..3b8545b41c72a 100644 --- a/contrib/dblink/meson.build +++ b/contrib/dblink/meson.build @@ -41,4 +41,9 @@ tests += { 't/001_auth_scram.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_auth_scram.py', + ], + }, } diff --git a/contrib/dblink/pyt/test_001_auth_scram.py b/contrib/dblink/pyt/test_001_auth_scram.py new file mode 100644 index 0000000000000..cf8decd87f79d --- /dev/null +++ b/contrib/dblink/pyt/test_001_auth_scram.py @@ -0,0 +1,209 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long +"""Port of contrib/dblink/t/001_auth_scram.pl. + +dblink SCRAM passthrough: a dblink_fdw server with use_scram_passthrough reuses +the client's SCRAM keys to authenticate to the remote (same or different +cluster) without a stored password; disabling passthrough, overriding +require_auth, or supplying scram_client_key/scram_server_key options all fail +as expected, and loopback trust/password configurations are rejected. +""" + +import os +import re + +_USER = "user01" +_DB0 = "db0" +_DB1 = "db1" +_DB2 = "db2" + + +def _u_connstr(node, db): + return node.connstr(db) + " user=" + _USER + + +def _setup_table(node, db, tbl): + node.safe_psql( + "CREATE TABLE {} AS SELECT g as a, g + 1 as b " + "FROM generate_series(1,10) g(g)".format(tbl), + dbname=db, + ) + node.safe_psql("GRANT USAGE ON SCHEMA public TO {}".format(_USER), dbname=db) + node.safe_psql("GRANT SELECT ON {} TO {}".format(tbl, _USER), dbname=db) + + +def _setup_fdw_server(node, db, fdw, fdw_node, dbname, require_auth=None): + extra = ", require_auth 'none'" if require_auth == "none" else "" + node.safe_psql( + "CREATE SERVER {} FOREIGN DATA WRAPPER dblink_fdw options (" + "host '{}', port '{}', dbname '{}', use_scram_passthrough 'true'{})".format( + fdw, fdw_node.host, fdw_node.port, dbname, extra + ), + dbname=db, + ) + node.safe_psql( + "GRANT USAGE ON FOREIGN SERVER {} TO {};".format(fdw, _USER), dbname=db + ) + node.safe_psql("GRANT ALL ON SCHEMA public TO {}".format(_USER), dbname=db) + + +def _setup_user_mapping(node, db, fdw): + node.safe_psql( + "CREATE USER MAPPING FOR {user} SERVER {fdw} OPTIONS " + "(user '{user}');".format(user=_USER, fdw=fdw), + dbname=db, + ) + + +def _test_fdw_auth(node, db, tbl, fdw, testname): + res = node.psql_capture( + "SELECT count(1) FROM dblink('{fdw}', 'SELECT * FROM {tbl}') AS " + "{tbl}(a int, b int)".format(fdw=fdw, tbl=tbl), + connstr=_u_connstr(node, db), + ) + assert res.stdout == "10", testname + + +def test_001_auth_scram(create_pg): # pylint: disable=too-many-statements + """dblink SCRAM passthrough succeeds and misconfigurations fail.""" + node1 = create_pg("node1") + node2 = create_pg("node2") + fdw_server, fdw_server2 = "db1_fdw", "db2_fdw" + fdw_server3 = "db1_fdw_override" + fdw_invalid_server, fdw_invalid_server2 = "db2_fdw_invalid", "db2_fdw_invalid2" + node1.safe_psql("CREATE USER {} WITH password 'pass'".format(_USER)) + node2.safe_psql("CREATE USER {} WITH password 'pass'".format(_USER)) + os.environ["PGPASSWORD"] = "pass" + try: + node1.safe_psql("CREATE DATABASE {}".format(_DB0)) + node1.safe_psql("CREATE DATABASE {}".format(_DB1)) + node2.safe_psql("CREATE DATABASE {}".format(_DB2)) + _setup_table(node1, _DB1, "t") + _setup_table(node2, _DB2, "t2") + node1.safe_psql("CREATE EXTENSION IF NOT EXISTS dblink", dbname=_DB0) + _setup_fdw_server(node1, _DB0, fdw_server, node1, _DB1) + _setup_fdw_server(node1, _DB0, fdw_server2, node2, _DB2) + _setup_fdw_server( + node1, _DB0, fdw_invalid_server, node2, _DB2, require_auth="none" + ) + _setup_fdw_server(node1, _DB0, fdw_invalid_server2, node2, _DB2) + _setup_fdw_server(node1, _DB0, fdw_server3, node1, _DB1) + for fdw in (fdw_server, fdw_server2, fdw_invalid_server, fdw_server3): + _setup_user_mapping(node1, _DB0, fdw) + rolpassword = node1.safe_psql( + "SELECT rolpassword FROM pg_authid WHERE rolname = '{}';".format(_USER) + ) + node2.safe_psql("ALTER ROLE {} PASSWORD '{}'".format(_USER, rolpassword)) + os.unlink("{}/pg_hba.conf".format(node1.datadir)) + os.unlink("{}/pg_hba.conf".format(node2.datadir)) + node1.append_conf( + "\nlocal db0 all scram-sha-256\n" + "local db1 all scram-sha-256\n", + "pg_hba.conf", + ) + node2.append_conf( + "\nlocal db2 all scram-sha-256\n", + "pg_hba.conf", + ) + node1.restart() + node2.restart() + _test_scram_keys_not_overwritten(node1, _DB0, fdw_invalid_server2) + _test_fdw_auth( + node1, + _DB0, + "t", + fdw_server, + "SCRAM auth on the same database cluster must succeed", + ) + _test_fdw_auth( + node1, + _DB0, + "t2", + fdw_server2, + "SCRAM auth on a different database cluster must succeed", + ) + _test_invalid_overwritten_require_auth(node1, fdw_invalid_server) + _test_disabled_passthrough(node1, fdw_server3) + _test_loopback_rejections(node1, node2, fdw_server, fdw_server2) + finally: + os.environ.pop("PGPASSWORD", None) + + +def _test_scram_keys_not_overwritten(node, db, fdw): + for opt in ("scram_client_key", "scram_server_key"): + res = node.psql_capture( + "CREATE USER MAPPING FOR {user} SERVER {fdw} OPTIONS " + "(user '{user}', {opt} 'key');".format(user=_USER, fdw=fdw, opt=opt), + connstr=_u_connstr(node, db), + ) + assert res.rc == 3, "user mapping creation fails when using {}".format(opt) + assert re.search(r'ERROR: invalid option "{}"'.format(opt), res.stderr) + + +def _test_invalid_overwritten_require_auth(node1, fdw): + res = node1.psql_capture( + "select * from dblink('{}', 'select * from t') as t(a int, b int)".format(fdw), + connstr=_u_connstr(node1, _DB0), + ) + assert res.rc == 3, "loopback trust fails when overwriting require_auth" + assert re.search( + r"password or GSSAPI delegated credentials required", res.stderr + ), "expected error when connecting to a fdw overwriting the require_auth" + + +def _test_disabled_passthrough(node1, fdw): + connstr = _u_connstr(node1, _DB0) + node1.psql_capture( + "ALTER USER MAPPING FOR {} SERVER {} OPTIONS(add use_scram_passthrough " + "'false')".format(_USER, fdw), + connstr=connstr, + ) + res = node1.psql_capture( + "select * from dblink('{}', 'select * from t') as t(a int, b int)".format(fdw), + connstr=connstr, + ) + assert res.rc == 3, "SCRAM passthrough disabled on user mapping should fail" + assert re.search( + r"password", res.stderr, re.IGNORECASE + ), "expected password-related error when scram passthrough disabled" + + +def _test_loopback_rejections(node1, node2, fdw_server, fdw_server2): + os.unlink("{}/pg_hba.conf".format(node1.datadir)) + os.unlink("{}/pg_hba.conf".format(node2.datadir)) + node1.append_conf( + "\nlocal db0 all scram-sha-256\n" + "local db1 all trust\n", + "pg_hba.conf", + ) + node2.append_conf( + "\nlocal all all password\n", + "pg_hba.conf", + ) + node1.restart() + node2.restart() + res = node1.psql_capture( + "SELECT * FROM dblink('{}', 'SELECT * FROM t') AS t(a int, b int)".format( + fdw_server + ), + connstr=_u_connstr(node1, _DB0), + ) + assert res.rc == 3, "loopback trust fails on the same cluster" + assert re.search( + r'failed: authentication method requirement "scram-sha-256" failed: ' + r"server did not complete authentication", + res.stderr, + ), "expected error from loopback trust (same cluster)" + res = node1.psql_capture( + "SELECT * FROM dblink('{}', 'SELECT * FROM t2') AS t2(a int, b int)".format( + fdw_server2 + ), + connstr=_u_connstr(node1, _DB0), + ) + assert res.rc == 3, "loopback password fails on a different cluster" + assert re.search( + r'authentication method requirement "scram-sha-256" failed: ' + r"server requested a cleartext password", + res.stderr, + ), "expected error from loopback password (different cluster)" diff --git a/contrib/oid2name/meson.build b/contrib/oid2name/meson.build index 82b9ba48989e4..62ae2b6347a63 100644 --- a/contrib/oid2name/meson.build +++ b/contrib/oid2name/meson.build @@ -21,6 +21,11 @@ tests += { 'name': 'oid2name', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + ], + }, 'tap': { 'tests': [ 't/001_basic.pl', diff --git a/contrib/oid2name/pyt/test_001_basic.py b/contrib/oid2name/pyt/test_001_basic.py new file mode 100644 index 0000000000000..af00039380083 --- /dev/null +++ b/contrib/oid2name/pyt/test_001_basic.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of contrib/oid2name/t/001_basic.pl. + +oid2name --help / --version / invalid-option handling. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_basic(pg_bin): + """oid2name --help / --version / invalid-option handling..""" + pg_bin.program_help_ok("oid2name") + pg_bin.program_version_ok("oid2name") + pg_bin.program_options_handling_ok("oid2name") diff --git a/contrib/pg_prewarm/meson.build b/contrib/pg_prewarm/meson.build index e70546a451b4f..1002c20dbcfc7 100644 --- a/contrib/pg_prewarm/meson.build +++ b/contrib/pg_prewarm/meson.build @@ -34,6 +34,11 @@ tests += { 'pg_prewarm', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + ], + }, 'tap': { 'tests': [ 't/001_basic.pl', diff --git a/contrib/pg_prewarm/pyt/test_001_basic.py b/contrib/pg_prewarm/pyt/test_001_basic.py new file mode 100644 index 0000000000000..a409def91b239 --- /dev/null +++ b/contrib/pg_prewarm/pyt/test_001_basic.py @@ -0,0 +1,72 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of contrib/pg_prewarm/t/001_basic.pl. + +pg_prewarm smoke test: prewarming in buffer/read/prefetch modes succeeds (or reports prefetch unsupported), permission checks are enforced on tables/indexes for an unprivileged role, and the cluster shuts down cleanly. +Generated from the Perl original via .agent/gen_golden.py. +""" + +import re + + +def test_001_basic(pg_bin, create_pg): + """pg_prewarm buffer/read/prefetch modes and permission enforcement.""" + node = create_pg("main", start=False) + node.append_conf( + "shared_preload_libraries = 'pg_prewarm'\n pg_prewarm.autoprewarm = true\n pg_prewarm.autoprewarm_interval = 0" + ) + node.start() + node.safe_psql( + "CREATE EXTENSION pg_prewarm;\nCREATE TABLE test(c1 int);\nINSERT INTO test SELECT generate_series(1, 100);\nCREATE INDEX test_idx ON test(c1);\nCREATE ROLE test_user LOGIN;" + ) + result = node.safe_psql("SELECT pg_prewarm('test', 'read');") + assert re.search( + r"""^[1-9][0-9]*$""", + result, + ), "read mode succeeded" + result = node.safe_psql("SELECT pg_prewarm('test', 'buffer');") + assert re.search( + r"""^[1-9][0-9]*$""", + result, + ), "buffer mode succeeded" + result = node.psql_capture("SELECT pg_prewarm('test', 'prefetch');") + assert re.search(r"""^[1-9][0-9]*$""", result.stdout), "prefetch mode succeeded" + result = node.psql_capture( + "SELECT pg_prewarm('test');", extra_params=["--username", "test_user"] + ) + assert re.search( + r"""permission denied for table test""", result.stderr + ), "pg_prewarm failed as expected" + result = node.psql_capture( + "SELECT pg_prewarm('test_idx');", extra_params=["--username", "test_user"] + ) + assert re.search( + r"""permission denied for index test_idx""", result.stderr + ), "pg_prewarm failed as expected" + node.safe_psql("GRANT SELECT ON test TO test_user;") + result = node.safe_psql("SELECT pg_prewarm('test');") + assert re.search( + r"""^[1-9][0-9]*$""", + result, + ), "pg_prewarm succeeded as expected" + result = node.safe_psql("SELECT pg_prewarm('test_idx');") + assert re.search( + r"""^[1-9][0-9]*$""", + result, + ), "pg_prewarm succeeded as expected" + result = node.safe_psql("SELECT autoprewarm_dump_now();") + assert re.search( + r"""^[1-9][0-9]*$""", + result, + ), "autoprewarm_dump_now succeeded" + node.restart() + node.wait_for_log( + r"""autoprewarm successfully prewarmed [1-9][0-9]* of [0-9]+ previously-loaded blocks""" + ) + node.stop() + pg_bin.command_like( + ["pg_controldata", str(node.datadir)], + r"""Database cluster state:\s*shut down""", + "cluster shut down normally", + ) diff --git a/contrib/pg_stash_advice/meson.build b/contrib/pg_stash_advice/meson.build index 96f485b772998..daaeceec2ca71 100644 --- a/contrib/pg_stash_advice/meson.build +++ b/contrib/pg_stash_advice/meson.build @@ -35,6 +35,11 @@ tests += { 'pg_stash_advice_utf8', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_persist.py', + ], + }, 'tap': { 'tests': [ 't/001_persist.pl', diff --git a/contrib/pg_stash_advice/pyt/test_001_persist.py b/contrib/pg_stash_advice/pyt/test_001_persist.py new file mode 100644 index 0000000000000..8c1d670002895 --- /dev/null +++ b/contrib/pg_stash_advice/pyt/test_001_persist.py @@ -0,0 +1,57 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of contrib/pg_stash_advice/t/001_persist.pl. + +pg_stash_advice persistence: advice stashes/entries are dumped to pg_stash_advice.tsv and reloaded on restart (verified via the startup log), and the dump file is removed once all stashes are dropped. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_persist(create_pg): + """pg_stash_advice persistence across restart and cleanup on drop.""" + node = create_pg("main", start=False) + node.append_conf( + "shared_preload_libraries = 'pg_plan_advice, pg_stash_advice'\npg_stash_advice.persist = true\npg_stash_advice.persist_interval = 0" + ) + node.start() + node.safe_psql("CREATE EXTENSION pg_stash_advice;") + node.safe_psql( + "SELECT pg_create_advice_stash('stash_a');\n\tSELECT pg_set_stashed_advice('stash_a', 1001, 'IndexScan(t)');\n\tSELECT pg_set_stashed_advice('stash_a', 1002, E'line1\\nline2\\ttab\\\\backslash');\n\tSELECT pg_create_advice_stash('stash_b');\n\tSELECT pg_set_stashed_advice('stash_b', 2001, 'SeqScan(t)');" + ) + result = node.safe_psql( + "SELECT stash_name, num_entries FROM pg_get_advice_stashes() ORDER BY stash_name" + ) + assert result == "stash_a|2\nstash_b|1", "stashes present before restart" + node.restart() + node.wait_for_log(r"""loaded 2 advice stashes and 3 entries""") + result = node.safe_psql( + "SELECT stash_name, num_entries FROM pg_get_advice_stashes() ORDER BY stash_name" + ) + assert result == "stash_a|2\nstash_b|1", "stashes survived restart" + result = node.safe_psql( + "SELECT stash_name, query_id, advice_string FROM pg_get_advice_stash_contents(NULL) ORDER BY stash_name, query_id" + ) + assert ( + result + == "stash_a|1001|IndexScan(t)\nstash_a|1002|line1\nline2\ttab\\backslash\nstash_b|2001|SeqScan(t)" + ), "entry contents survived restart with special characters intact" + node.safe_psql("SELECT pg_create_advice_stash('stash_c');") + node.restart() + node.wait_for_log(r"""loaded 3 advice stashes and 3 entries""") + result = node.safe_psql( + "SELECT stash_name, num_entries FROM pg_get_advice_stashes() ORDER BY stash_name" + ) + assert ( + result == "stash_a|2\nstash_b|1\nstash_c|0" + ), "all three stashes survived second restart" + node.safe_psql( + "SELECT pg_drop_advice_stash('stash_a');\n\tSELECT pg_drop_advice_stash('stash_b');\n\tSELECT pg_drop_advice_stash('stash_c');" + ) + node.restart() + result = node.safe_psql("SELECT count(*) FROM pg_get_advice_stashes()") + assert result == "0", "no stashes after dropping all and restarting" + assert not ( + node.datadir / "pg_stash_advice.tsv" + ).exists(), "dump file removed after all stashes dropped" + node.stop() diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build index 9d78cb88b7d78..af929f660c3b8 100644 --- a/contrib/pg_stat_statements/meson.build +++ b/contrib/pg_stat_statements/meson.build @@ -67,6 +67,11 @@ tests += { # runningcheck users do not have (e.g. buildfarm clients). 'runningcheck': false, }, + 'pytest': { + 'tests': [ + 'pyt/test_010_restart.py', + ], + }, 'tap': { 'tests': [ 't/010_restart.pl', diff --git a/contrib/pg_stat_statements/pyt/test_010_restart.py b/contrib/pg_stat_statements/pyt/test_010_restart.py new file mode 100644 index 0000000000000..250a622dd3b07 --- /dev/null +++ b/contrib/pg_stat_statements/pyt/test_010_restart.py @@ -0,0 +1,42 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of contrib/pg_stat_statements/t/010_restart.pl. + +pg_stat_statements persistence: collected statements survive a server restart, +and are discarded when pg_stat_statements.save is turned off. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_010_restart(create_pg): + """pg_stat_statements persistence: collected statements survive a server restart,.""" + node = create_pg("main", start=False) + node.append_conf("shared_preload_libraries = 'pg_stat_statements'") + node.start() + node.safe_psql("CREATE EXTENSION pg_stat_statements") + node.safe_psql("CREATE TABLE t1 (a int)") + node.safe_psql("SELECT a FROM t1") + assert ( + node.safe_psql( + "SELECT query FROM pg_stat_statements WHERE query NOT LIKE '%pg_stat_statements%' ORDER BY query" + ) + == "CREATE TABLE t1 (a int)\nSELECT a FROM t1" + ), "pg_stat_statements populated" + node.restart() + assert ( + node.safe_psql( + "SELECT query FROM pg_stat_statements WHERE query NOT LIKE '%pg_stat_statements%' ORDER BY query" + ) + == "CREATE TABLE t1 (a int)\nSELECT a FROM t1" + ), "pg_stat_statements data kept across restart" + node.append_conf("pg_stat_statements.save = false") + node.reload() + node.restart() + assert ( + node.safe_psql( + "SELECT count(*) FROM pg_stat_statements WHERE query NOT LIKE '%pg_stat_statements%'" + ) + == "0" + ), "pg_stat_statements data not kept across restart with .save=false" + node.stop() diff --git a/contrib/pg_visibility/meson.build b/contrib/pg_visibility/meson.build index 8a17050f2ac52..abc4ccd2ed20b 100644 --- a/contrib/pg_visibility/meson.build +++ b/contrib/pg_visibility/meson.build @@ -33,6 +33,12 @@ tests += { 'pg_visibility', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_concurrent_transaction.py', + 'pyt/test_002_corrupt_vm.py', + ], + }, 'tap': { 'tests': [ 't/001_concurrent_transaction.pl', diff --git a/contrib/pg_visibility/pyt/test_001_concurrent_transaction.py b/contrib/pg_visibility/pyt/test_001_concurrent_transaction.py new file mode 100644 index 0000000000000..cb2278fb109ba --- /dev/null +++ b/contrib/pg_visibility/pyt/test_001_concurrent_transaction.py @@ -0,0 +1,35 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of contrib/pg_visibility/t/001_concurrent_transaction.pl. + +pg_check_visible reports no errors for a vacuumed table on both primary and a streaming standby, even with a concurrent open transaction (held via a background psql session) affecting visibility-map computation. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_concurrent_transaction(create_pg): + """pg_check_visible clean on primary+standby with a concurrent transaction.""" + node = create_pg("main", allows_streaming=True, start=False) + node.start() + backup_name = "my_backup" + node.backup(backup_name) + standby = create_pg( + "standby", from_backup=(node, backup_name), has_streaming=True, start=False + ) + standby.start() + node.safe_psql("CREATE DATABASE other_database;") + bsession = node.background_psql("other_database") + bsession.query("BEGIN;\n\tSELECT txid_current();") + node.safe_psql( + "CREATE EXTENSION pg_visibility;\nCREATE TABLE vacuum_test AS SELECT 42 i;\nVACUUM (disable_page_skipping) vacuum_test;" + ) + result = node.safe_psql("SELECT * FROM pg_check_visible('vacuum_test');") + assert result == "", "pg_check_visible() detects no errors" + node.wait_for_catchup(standby) + result = standby.safe_psql("SELECT * FROM pg_check_visible('vacuum_test');") + assert result == "", "pg_check_visible() detects no errors" + bsession.query("COMMIT;") + bsession.quit() + node.stop() + standby.stop() diff --git a/contrib/pg_visibility/pyt/test_002_corrupt_vm.py b/contrib/pg_visibility/pyt/test_002_corrupt_vm.py new file mode 100644 index 0000000000000..e80953d25ecee --- /dev/null +++ b/contrib/pg_visibility/pyt/test_002_corrupt_vm.py @@ -0,0 +1,68 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of contrib/pg_visibility/t/002_corrupt_vm.pl. + +A stale visibility map exposes corruption: after freezing a table, the _vm fork +is saved, more rows are deleted (updating the real VM), then the old _vm is +restored so it disagrees with the heap. pg_check_visible and pg_check_frozen +must then report exactly the tuples whose visibility/frozen bits are now wrong. +""" + +import shutil + + +def test_002_corrupt_vm(create_pg): + """pg_check_visible/pg_check_frozen detect a restored, stale visibility map.""" + node = create_pg("main", start=False) + node.append_conf("autovacuum=off") + node.start() + blck_size = node.safe_psql("SHOW block_size;") + node.safe_psql( + f""" + CREATE EXTENSION pg_visibility; + CREATE TABLE corruption_test + WITH (autovacuum_enabled = false) AS + SELECT + i, + repeat('a', 10) AS data + FROM + generate_series(1, {blck_size}) i; + VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) corruption_test; + """ + ) + npages = node.safe_psql( + "SELECT relpages FROM pg_class WHERE relname = 'corruption_test';" + ) + assert int(npages) >= 10, "table has at least 10 pages" + relfile = node.safe_psql("SELECT pg_relation_filepath('corruption_test');") + node.safe_psql("DELETE FROM corruption_test WHERE (ctid::text::point)[0] = 0;") + node.stop() + vm_file = f"{node.datadir}/{relfile}_vm" + shutil.copy(vm_file, f"{vm_file}_temp") + node.start() + tuples = node.safe_psql( + """SELECT ctid FROM ( + SELECT ctid FROM corruption_test + WHERE (ctid::text::point)[0] != 0 + ORDER BY random() LIMIT 5) + ORDER BY ctid ASC;""" + ) + # Perl: s/\n/,/g; s/\(/'(/g; s/\)/)'/g -- build a quoted ctid IN-list. + tuples_query = tuples.replace("\n", ",").replace("(", "'(").replace(")", ")'") + node.safe_psql(f"DELETE FROM corruption_test WHERE ctid in ({tuples_query});") + node.stop() + shutil.move(f"{vm_file}_temp", vm_file) + node.start() + result = node.safe_psql( + """SELECT DISTINCT t_ctid + FROM pg_check_visible('corruption_test') + ORDER BY t_ctid ASC;""" + ) + assert result == tuples, "pg_check_visible must report tuples as corrupted" + result = node.safe_psql( + """SELECT DISTINCT t_ctid + FROM pg_check_frozen('corruption_test') + ORDER BY t_ctid ASC;""" + ) + assert result == tuples, "pg_check_frozen must report tuples as corrupted" + node.stop() diff --git a/contrib/postgres_fdw/meson.build b/contrib/postgres_fdw/meson.build index 3e2ed06b7665c..c150adea06e25 100644 --- a/contrib/postgres_fdw/meson.build +++ b/contrib/postgres_fdw/meson.build @@ -48,6 +48,12 @@ tests += { ], 'regress_args': ['--load-extension=postgres_fdw'], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_auth_scram.py', + 'pyt/test_010_subscription.py', + ], + }, 'tap': { 'tests': [ 't/001_auth_scram.pl', diff --git a/contrib/postgres_fdw/pyt/test_001_auth_scram.py b/contrib/postgres_fdw/pyt/test_001_auth_scram.py new file mode 100644 index 0000000000000..2a3052b2b2200 --- /dev/null +++ b/contrib/postgres_fdw/pyt/test_001_auth_scram.py @@ -0,0 +1,185 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +"""Port of contrib/postgres_fdw/t/001_auth_scram.pl. + +postgres_fdw SCRAM credential passthrough: with use_scram_passthrough on the +foreign server, a SCRAM-authenticated user's credentials are forwarded so the +FDW connection authenticates without a stored password -- on the same cluster +and across clusters. Disabling passthrough on the user mapping makes the FDW +query fail with a password error, and loopback trust/password HBA entries are +rejected because the connection requires scram-sha-256. +""" + +import os +import re +import sys + +import pytest + +USER = "user01" +HOSTADDR = "127.0.0.1" + + +def _setup_table(node, db, tbl): + node.safe_psql( + "CREATE TABLE {} AS SELECT g, g + 1 FROM generate_series(1,10) g(g)".format( + tbl + ), + dbname=db, + ) + node.safe_psql("GRANT USAGE ON SCHEMA public TO {}".format(USER), dbname=db) + node.safe_psql("GRANT SELECT ON {} TO {}".format(tbl, USER), dbname=db) + + +def _setup_fdw_server(node, db, fdw, fdw_node, dbname): + node.safe_psql( + "CREATE SERVER {} FOREIGN DATA WRAPPER postgres_fdw options (host '{}', " + "port '{}', dbname '{}', use_scram_passthrough 'true')".format( + fdw, fdw_node.host, fdw_node.port, dbname + ), + dbname=db, + ) + + +def _setup_user_mapping(node, db, fdw): + node.safe_psql( + "CREATE USER MAPPING FOR {u} SERVER {f} OPTIONS (user '{u}');".format( + u=USER, f=fdw + ), + dbname=db, + ) + node.safe_psql( + "GRANT USAGE ON FOREIGN SERVER {} TO {}".format(fdw, USER), dbname=db + ) + node.safe_psql("GRANT ALL ON SCHEMA public TO {}".format(USER), dbname=db) + + +def _setup_pghba(node): + (node.datadir / "pg_hba.conf").unlink(missing_ok=True) + node.append_conf( + "local all all " + "scram-sha-256\n" + "host all all {}/32 " + "scram-sha-256\n".format(HOSTADDR), + filename="pg_hba.conf", + ) + node.restart() + + +def _test_auth(node, db, tbl, test_name): + connstr = node.connstr(db) + " user={}".format(USER) + assert ( + node.safe_psql( + "SELECT count(1) FROM {}".format(tbl), dbname=db, connstr=connstr + ) + == "10" + ), test_name + + +def _test_fdw_auth(node, db, tbl, fdw, test_name): + connstr = node.connstr(db) + " user={}".format(USER) + node.safe_psql( + "IMPORT FOREIGN SCHEMA public LIMIT TO ({}) FROM SERVER {} INTO " + "public;".format(tbl, fdw), + dbname=db, + connstr=connstr, + ) + _test_auth(node, db, tbl, test_name) + + +@pytest.mark.skipif(sys.platform == "win32", reason="needs Unix-domain sockets") +def test_001_auth_scram(create_pg): + """postgres_fdw forwards SCRAM credentials; passthrough-off and trust fail.""" + db0, db1, db2 = "db0", "db1", "db2" + fdw1, fdw2, fdw3 = "db1_fdw", "db2_fdw", "db1_fdw_override" + node1 = create_pg("node1") + node2 = create_pg("node2") + node1.safe_psql("CREATE USER {} WITH password 'pass'".format(USER)) + node2.safe_psql("CREATE USER {} WITH password 'pass'".format(USER)) + os.environ["PGPASSWORD"] = "pass" + node1.safe_psql("CREATE DATABASE {}".format(db0)) + node1.safe_psql("CREATE DATABASE {}".format(db1)) + node2.safe_psql("CREATE DATABASE {}".format(db2)) + _setup_table(node1, db1, "t") + _setup_table(node2, db2, "t2") + node1.safe_psql("CREATE EXTENSION IF NOT EXISTS postgres_fdw", dbname=db0) + _setup_fdw_server(node1, db0, fdw1, node1, db1) + _setup_fdw_server(node1, db0, fdw2, node2, db2) + _setup_fdw_server(node1, db0, fdw3, node1, db1) + for fdw in (fdw1, fdw2, fdw3): + _setup_user_mapping(node1, db0, fdw) + rolpassword = node1.safe_psql( + "SELECT rolpassword FROM pg_authid WHERE rolname = '{}';".format(USER) + ) + node2.safe_psql("ALTER ROLE {} PASSWORD '{}'".format(USER, rolpassword)) + _setup_pghba(node1) + _setup_pghba(node2) + _test_fdw_auth( + node1, db0, "t", fdw1, "SCRAM auth on the same database cluster must succeed" + ) + _test_fdw_auth( + node1, + db0, + "t2", + fdw2, + "SCRAM auth on a different database cluster must succeed", + ) + _test_auth( + node2, db2, "t2", "SCRAM auth directly on foreign server should still succeed" + ) + _passthrough_off(node1, db0, fdw3) + _loopback_rejections(node1, node2, db0) + + +def _passthrough_off(node1, db0, fdw3): + connstr = node1.connstr(db0) + " user={}".format(USER) + node1.safe_psql( + "ALTER USER MAPPING FOR {} SERVER {} OPTIONS(add use_scram_passthrough " + "'false')".format(USER, fdw3), + dbname=db0, + connstr=connstr, + ) + node1.safe_psql( + "CREATE FOREIGN TABLE override_t (g int, col2 int) SERVER {} OPTIONS " + "(table_name 't');".format(fdw3), + dbname=db0, + connstr=connstr, + ) + node1.safe_psql( + "GRANT SELECT ON override_t TO {};".format(USER), dbname=db0, connstr=connstr + ) + res = node1.psql_capture( + "SELECT count(1) FROM override_t", dbname=db0, connstr=connstr + ) + assert res.rc == 3, "SCRAM passthrough disabled on user mapping should fail" + assert re.search( + r"password", res.stderr, re.I + ), "expected password-related error when scram passthrough disabled" + + +def _loopback_rejections(node1, node2, db0): + (node1.datadir / "pg_hba.conf").unlink(missing_ok=True) + (node2.datadir / "pg_hba.conf").unlink(missing_ok=True) + node1.append_conf( + "local db0 all " + "scram-sha-256\n" + "local db1 all trust\n", + filename="pg_hba.conf", + ) + node2.append_conf( + "local all all password\n", + filename="pg_hba.conf", + ) + node1.restart() + node2.restart() + connstr = node1.connstr(db0) + " user={}".format(USER) + res = node1.psql_capture("select count(1) from t", dbname=db0, connstr=connstr) + assert res.rc == 3, "loopback trust fails on the same cluster" + assert re.search( + r'failed: authentication method requirement "scram-sha-256"', res.stderr + ), "expected error from loopback trust (same cluster)" + res = node1.psql_capture("select count(1) from t2", dbname=db0, connstr=connstr) + assert res.rc == 3, "loopback password fails on a different cluster" + assert re.search( + r'failed: authentication method requirement "scram-sha-256"', res.stderr + ), "expected error from loopback password (different cluster)" diff --git a/contrib/postgres_fdw/pyt/test_010_subscription.py b/contrib/postgres_fdw/pyt/test_010_subscription.py new file mode 100644 index 0000000000000..511b46195eee5 --- /dev/null +++ b/contrib/postgres_fdw/pyt/test_010_subscription.py @@ -0,0 +1,73 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of contrib/postgres_fdw/t/010_subscription.pl. + +Logical replication into a postgres_fdw foreign table is rejected/handled: changing a subscription parameter restarts the apply worker (verified via the publisher/subscriber log). +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_010_subscription(create_pg): + """Logical replication into a postgres_fdw foreign table is rejected/handled.""" + node_publisher = create_pg("publisher", allows_streaming="logical", start=False) + node_publisher.start() + node_subscriber = create_pg("subscriber", start=False) + node_subscriber.start() + node_publisher.safe_psql( + "CREATE TABLE tab_ins AS SELECT a, a + 1 as b FROM generate_series(1,1002) AS a" + ) + node_subscriber.safe_psql("CREATE EXTENSION postgres_fdw") + node_subscriber.safe_psql("CREATE TABLE tab_ins (a int, b int)") + publisher_connstr = node_publisher.connstr() + " dbname=postgres" + node_publisher.safe_psql("CREATE PUBLICATION tap_pub FOR TABLE tab_ins") + publisher_host = node_publisher.host + publisher_port = node_publisher.port + node_subscriber.safe_psql( + "CREATE SERVER tap_server FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host '" + + str(publisher_host) + + "', port '" + + str(publisher_port) + + "', dbname 'postgres')" + ) + node_subscriber.safe_psql("CREATE USER MAPPING FOR PUBLIC SERVER tap_server") + node_subscriber.safe_psql( + "CREATE SUBSCRIPTION tap_sub SERVER tap_server PUBLICATION tap_pub WITH (password_required=false)" + ) + node_subscriber.wait_for_subscription_sync() + result = node_subscriber.safe_psql("SELECT MAX(a) FROM tab_ins") + assert result == "1002", "check that initial data was copied to subscriber" + node_publisher.safe_psql( + "INSERT INTO tab_ins SELECT a, a + 1 FROM generate_series(1003,1050) a" + ) + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT MAX(a) FROM tab_ins") + assert result == "1050", "check that inserted data was copied to subscriber" + log_offset = node_subscriber.current_log_position() + node_subscriber.safe_psql( + "ALTER SUBSCRIPTION tap_sub CONNECTION '" + publisher_connstr + "'" + ) + node_subscriber.wait_for_log( + r"""logical replication worker for subscription "tap_sub" will restart because of a parameter change""", + log_offset, + ) + node_publisher.safe_psql( + "INSERT INTO tab_ins SELECT a, a + 1 FROM generate_series(1051,1057) a" + ) + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT MAX(a) FROM tab_ins") + assert ( + result == "1057" + ), "check subscription after ALTER SUBSCRIPTION ... CONNECTION" + log_offset = node_subscriber.current_log_position() + node_subscriber.safe_psql("ALTER SUBSCRIPTION tap_sub SERVER tap_server") + node_subscriber.wait_for_log( + r"""logical replication worker for subscription "tap_sub" will restart because of a parameter change""", + log_offset, + ) + node_publisher.safe_psql( + "INSERT INTO tab_ins SELECT a, a + 1 FROM generate_series(1058,1073) a" + ) + node_publisher.wait_for_catchup("tap_sub") + result = node_subscriber.safe_psql("SELECT MAX(a) FROM tab_ins") + assert result == "1073", "check subscription after ALTER SUBSCRIPTION ... SERVER" diff --git a/contrib/sepgsql/meson.build b/contrib/sepgsql/meson.build index 70f9d76863038..29ef02f52db4b 100644 --- a/contrib/sepgsql/meson.build +++ b/contrib/sepgsql/meson.build @@ -49,4 +49,9 @@ tests += { 't/001_sepgsql.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_sepgsql.py', + ], + }, } diff --git a/contrib/sepgsql/pyt/test_001_sepgsql.py b/contrib/sepgsql/pyt/test_001_sepgsql.py new file mode 100644 index 0000000000000..f4ddcedf5852e --- /dev/null +++ b/contrib/sepgsql/pyt/test_001_sepgsql.py @@ -0,0 +1,128 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +"""Port of contrib/sepgsql/t/001_sepgsql.pl. + +Runs the sepgsql label/dml/ddl/alter/misc(/truncate) regression suite, but only +when the platform is a properly configured SELinux host: PG_TEST_EXTRA must +include 'sepgsql', the SELinux tools (matchpathcon/runcon/sestatus) must be +present, SELinux must be enforcing in the unconfined_t domain, and the +sepgsql-regtest policy module with its booleans must be installed. On systems +that do not meet these conditions (e.g. non-SELinux Linux) the test skips, just +as the Perl original bails out. +""" + +import os +import subprocess +import sys + +import pytest + + +def _cmd_ok(argv): + try: + return ( + subprocess.run( + argv, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ).returncode + == 0 + ) + except OSError: + return False + + +def _sestatus_field(label): + try: + out = subprocess.run( + ["sestatus"], + stdout=subprocess.PIPE, + encoding="utf-8", + env={**os.environ, "LANG": "C"}, + check=False, + ).stdout + except OSError: + return "" + for line in out.splitlines(): + if line.startswith(label): + return line.split(":", 1)[1].strip() + return "" + + +def _require_selinux(): + if "sepgsql" not in os.environ.get("PG_TEST_EXTRA", "").split(): + pytest.skip("Potentially unsafe test sepgsql not enabled in PG_TEST_EXTRA") + for tool in (["matchpathcon", "-n", "."], ["runcon", "--help"], ["sestatus"]): + if not _cmd_ok(tool): + pytest.skip("{} (SELinux tooling) not available".format(tool[0])) + try: + domain = subprocess.run( + ["id", "-Z"], stdout=subprocess.PIPE, encoding="utf-8", check=False + ).stdout.split(":") + except OSError: + pytest.skip("id -Z (SELinux) not available") + if len(domain) < 3 or domain[2] != "unconfined_t": + pytest.skip("tests must run from the unconfined_t SELinux domain") + if _sestatus_field("Current mode:") != "enforcing": + pytest.skip("SELinux must be enabled and in enforcing mode") + mnt = _sestatus_field("SELinuxfs mount:") + if not mnt or not os.path.exists( + os.path.join(mnt, "booleans", "sepgsql_regression_test_mode") + ): + pytest.skip("the sepgsql-regtest policy module is not installed") + for policy in ("sepgsql_regression_test_mode", "sepgsql_enable_users_ddl"): + out = subprocess.run( + ["getsebool", policy], + stdout=subprocess.PIPE, + encoding="utf-8", + check=False, + ).stdout.split() + if len(out) < 3 or out[2] != "on": + pytest.skip("SELinux boolean {} must be on".format(policy)) + + +@pytest.mark.skipif(sys.platform != "linux", reason="sepgsql is Linux/SELinux only") +def test_001_sepgsql(create_pg): + """Run the sepgsql regression suite on a configured SELinux host.""" + _require_selinux() + node = create_pg("test", start=False) + node.append_conf("log_statement=none") + sepgsql_sql = os.path.join(os.environ["share_contrib_dir"], "sepgsql.sql") + with open(sepgsql_sql, encoding="utf-8") as fh: + result = subprocess.run( + [ + os.path.join(str(node.bin_dir), "postgres"), + "--single", + "-F", + "-c", + "exit_on_error=true", + "-D", + str(node.datadir), + "template0", + ], + stdin=fh, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=node.connenv, + check=False, + ) + assert result.returncode == 0, "sepgsql installation script" + node.append_conf("shared_preload_libraries=sepgsql") + node.start() + tests = ["label", "dml", "ddl", "alter", "misc"] + if os.path.isfile("/sys/fs/selinux/class/db_table/perms/truncate"): + tests.append("truncate") + node.command_ok( + [ + os.environ["PG_REGRESS"], + "--bindir", + "", + "--inputdir", + ".", + "--launcher", + "./launcher", + *tests, + ], + "sepgsql tests", + ) diff --git a/contrib/test_decoding/meson.build b/contrib/test_decoding/meson.build index ac655853d269c..c8b76b291f590 100644 --- a/contrib/test_decoding/meson.build +++ b/contrib/test_decoding/meson.build @@ -78,4 +78,9 @@ tests += { 't/001_repl_stats.pl', ], }, + 'pytest': { + 'tests': [ + 'pyt/test_001_repl_stats.py', + ], + }, } diff --git a/contrib/test_decoding/pyt/test_001_repl_stats.py b/contrib/test_decoding/pyt/test_001_repl_stats.py new file mode 100644 index 0000000000000..fbb44aef49647 --- /dev/null +++ b/contrib/test_decoding/pyt/test_001_repl_stats.py @@ -0,0 +1,117 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +"""Port of contrib/test_decoding/t/001_repl_stats.pl. + +Replication-slot statistics in pg_stat_replication_slots: stats persist across a +restart, survive a slot whose on-disk file was removed (when it no longer fits +under max_replication_slots), and a logical decoding worker that restarts does +not crash. The pgstat file must exist after a clean shutdown. +""" + +import os +import shutil + + +def _test_slot_stats(node, expected, msg): + """Assert the per-slot total_txns/total_bytes summary matches expected.""" + result = node.safe_psql( + """ + SELECT slot_name, total_txns > 0 AS total_txn, + total_bytes > 0 AS total_bytes + FROM pg_stat_replication_slots + ORDER BY slot_name""" + ) + assert result == expected, msg + + +def test_001_repl_stats(create_pg): + """Replication-slot statistics persist and recover correctly.""" + node = create_pg("test", allows_streaming="logical", start=False) + node.append_conf("synchronous_commit = on") + node.start() + node.safe_psql("CREATE TABLE test_repl_stat(col1 int)") + node.safe_psql( + """ + SELECT pg_create_logical_replication_slot('regression_slot1', 'test_decoding'); + SELECT pg_create_logical_replication_slot('regression_slot2', 'test_decoding'); + SELECT pg_create_logical_replication_slot('regression_slot3', 'test_decoding'); + SELECT pg_create_logical_replication_slot('regression_slot4', 'test_decoding'); +""" + ) + node.safe_psql("INSERT INTO test_repl_stat values(generate_series(1, 5));") + node.safe_psql( + """ + SELECT data FROM pg_logical_slot_get_changes('regression_slot1', NULL, + NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); + SELECT data FROM pg_logical_slot_get_changes('regression_slot2', NULL, + NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); + SELECT data FROM pg_logical_slot_get_changes('regression_slot3', NULL, + NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); + SELECT data FROM pg_logical_slot_get_changes('regression_slot4', NULL, + NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); +""" + ) + assert node.poll_query_until( + """ + SELECT count(slot_name) >= 4 FROM pg_stat_replication_slots + WHERE slot_name ~ 'regression_slot' + AND total_txns > 0 AND total_bytes > 0; +""" + ), "Timed out while waiting for statistics to be updated" + node.safe_psql("SELECT pg_drop_replication_slot('regression_slot4')") + node.stop() + node.start() + _test_slot_stats( + node, + "regression_slot1|t|t\nregression_slot2|t|t\nregression_slot3|t|t", + "check replication statistics are updated", + ) + node.stop() + datadir = node.datadir + slot3_replslotdir = "{}/pg_replslot/regression_slot3".format(datadir) + shutil.rmtree(slot3_replslotdir) + node.append_conf("max_replication_slots = 2") + node.start() + _test_slot_stats( + node, + "regression_slot1|t|t\nregression_slot2|t|t", + "check replication statistics after removing the slot file", + ) + node.safe_psql("DROP TABLE test_repl_stat") + node.safe_psql("SELECT pg_drop_replication_slot('regression_slot1')") + node.safe_psql("SELECT pg_drop_replication_slot('regression_slot2')") + node.stop() + node.start() + slot_name_restart = "regression_slot5" + node.safe_psql( + "SELECT pg_create_logical_replication_slot('{}', 'test_decoding');".format( + slot_name_restart + ) + ) + bpgsql = node.background_psql("postgres", on_error_stop=True) + bpgsql.query_safe( + "SELECT pg_logical_slot_peek_binary_changes('{}', NULL, NULL)".format( + slot_name_restart + ) + ) + node.safe_psql("SELECT pg_drop_replication_slot('{}')".format(slot_name_restart)) + node.safe_psql( + "SELECT pg_create_logical_replication_slot('{}', 'test_decoding');".format( + slot_name_restart + ) + ) + bpgsql.query_safe( + "SELECT pg_logical_slot_peek_binary_changes('{}', NULL, NULL)".format( + slot_name_restart + ) + ) + node.safe_psql("SELECT pg_drop_replication_slot('{}')".format(slot_name_restart)) + node.stop() + node.bin.command_like( + ["pg_controldata", node.datadir], + r"Database cluster state:\s+shut down\n", + "node shut down ok", + ) + stats_file = "{}/pg_stat/pgstat.stat".format(datadir) + assert os.path.isfile(stats_file), "stats file must exist after shutdown" + bpgsql.quit() diff --git a/contrib/vacuumlo/meson.build b/contrib/vacuumlo/meson.build index 4ee5b04857573..e034ad11a673b 100644 --- a/contrib/vacuumlo/meson.build +++ b/contrib/vacuumlo/meson.build @@ -21,6 +21,11 @@ tests += { 'name': 'vacuumlo', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'pytest': { + 'tests': [ + 'pyt/test_001_basic.py', + ], + }, 'tap': { 'tests': [ 't/001_basic.pl', diff --git a/contrib/vacuumlo/pyt/test_001_basic.py b/contrib/vacuumlo/pyt/test_001_basic.py new file mode 100644 index 0000000000000..3496340d0987a --- /dev/null +++ b/contrib/vacuumlo/pyt/test_001_basic.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +# pylint: disable=line-too-long,too-many-statements +"""Port of contrib/vacuumlo/t/001_basic.pl. + +vacuumlo --help / --version / invalid-option handling. +Generated from the Perl original via .agent/gen_golden.py. +""" + + +def test_001_basic(pg_bin): + """vacuumlo --help / --version / invalid-option handling..""" + pg_bin.program_help_ok("vacuumlo") + pg_bin.program_version_ok("vacuumlo") + pg_bin.program_options_handling_ok("vacuumlo") From 0aadf54530199365731dcb8e004f7da81b5210ff Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 11:16:36 -0400 Subject: [PATCH 20/36] ci: run the pytest suite in the PostgreSQL CI matrix Enable the pytest suite on the Linux (incl. AddressSanitizer 64-bit and 32-bit), macOS, and Windows CI jobs, preloading the ASan runtime for the in-process libpq ctypes load on the sanitizer build. Co-authored-by: Andrew Dunstan Co-authored-by: Greg Burd --- .github/workflows/pg-ci.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/pg-ci.yml b/.github/workflows/pg-ci.yml index 5bc5292d2a55c..0b32bcbabff5b 100644 --- a/.github/workflows/pg-ci.yml +++ b/.github/workflows/pg-ci.yml @@ -668,6 +668,15 @@ jobs: - name: Test world shell: *su_postgres_shell + # The pytest suite loads libpq in-process via ctypes. Here libpq is + # AddressSanitizer-instrumented, and ASan must come first in the link + # order; dlopening it into an otherwise uninstrumented python aborts + # with "ASan runtime does not come first". Preload the ASan runtime for + # the test run to satisfy that (a no-op for the already-instrumented + # server/client binaries). Scoped to this step so the build is + # unaffected; detect_leaks is already disabled via ASAN_OPTIONS. + env: + ADDITIONAL_SETUP: export LD_PRELOAD="$(gcc -print-file-name=libasan.so)" run: *meson_test_world_cmd - *linux_collect_cores_step From 1e932ff91c5aad8c7a6b9c2cf27a29187b449d2c Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Thu, 11 Jun 2026 09:02:36 -0400 Subject: [PATCH 21/36] pytest: introduce a typed exception taxonomy Replace the ad-hoc RuntimeError/CalledProcessError signalling with a small hierarchy: PgServerError for server-lifecycle failures (start/stop/restart/ backup/pg_recvlogical) and PgSqlError for failed SQL statements. PgSqlError is an alias for libpq.LibpqError (kept in the lower libpq layer to avoid a circular import) so a failure raises the same diagnostic-carrying type whether the statement ran in-process through libpq or was shelled out to psql. safe_psql now raises PgSqlError, recovering the primary/DETAIL/HINT/CONTEXT text from psql's stderr (SQLSTATE is unavailable over psql's text protocol, so it stays None). No test catches the old types, so this changes no test behavior; it gives tests a meaningful type to assert on and is the foundation for the rest of the Pythonic cleanup. --- src/test/pytest/libpq/errors.py | 33 ++++++++++---------- src/test/pytest/pypg/__init__.py | 5 +++ src/test/pytest/pypg/errors.py | 42 +++++++++++++++++++++++++ src/test/pytest/pypg/server.py | 53 +++++++++++++++++++++++++++----- 4 files changed, 109 insertions(+), 24 deletions(-) create mode 100644 src/test/pytest/pypg/errors.py diff --git a/src/test/pytest/libpq/errors.py b/src/test/pytest/libpq/errors.py index c665b663e221c..a43b5f4ab01bb 100644 --- a/src/test/pytest/libpq/errors.py +++ b/src/test/pytest/libpq/errors.py @@ -1,27 +1,26 @@ # Copyright (c) 2025, PostgreSQL Global Development Group -""" -Exception classes for libpq errors. +"""Exception classes for libpq errors. + +``LibpqError`` carries the PostgreSQL diagnostic fields (SQLSTATE, severity, +detail, hint, ...) when libpq reports them. It is the lowest layer of the +framework's SQL-error hierarchy; ``pypg.PgSqlError`` is an alias for it, so +catching either name works and the layering (libpq below pypg) is preserved +without a circular import. """ +from __future__ import annotations + from typing import Optional class LibpqError(RuntimeError): - """Exception for libpq errors with PostgreSQL diagnostic fields.""" - - sqlstate: Optional[str] - severity: Optional[str] - primary: Optional[str] - detail: Optional[str] - hint: Optional[str] - schema_name: Optional[str] - table_name: Optional[str] - column_name: Optional[str] - datatype_name: Optional[str] - constraint_name: Optional[str] - position: Optional[int] - context: Optional[str] + """A SQL/libpq operation failed, carrying PostgreSQL diagnostic fields. + + ``sqlstate`` and the convenience ``sqlstate_class`` (its first two + characters) are the stable, locale-independent way to assert on a specific + error condition. + """ def __init__( self, @@ -56,7 +55,7 @@ def __init__( @property def sqlstate_class(self) -> Optional[str]: - """Returns the 2-character SQLSTATE class.""" + """The two-character SQLSTATE class, or None if no SQLSTATE is set.""" if self.sqlstate and len(self.sqlstate) >= 2: return self.sqlstate[:2] return None diff --git a/src/test/pytest/pypg/__init__.py b/src/test/pytest/pypg/__init__.py index fed8a1ea9f999..8842f04fac49f 100644 --- a/src/test/pytest/pypg/__init__.py +++ b/src/test/pytest/pypg/__init__.py @@ -6,6 +6,7 @@ test_timeout_default, ) from .command import CommandResult, PgBin +from .errors import PgError, PgServerError, PgSqlError, LibpqError from .fake import faker, meaningful_text, rand_str from .kerberos import KerberosServer from .server import PostgresServer @@ -33,6 +34,10 @@ "PostgresServer", "PgBin", "CommandResult", + "PgError", + "PgServerError", + "PgSqlError", + "LibpqError", "append_to_file", "check_mode_recursive", "chmod_recursive", diff --git a/src/test/pytest/pypg/errors.py b/src/test/pytest/pypg/errors.py new file mode 100644 index 0000000000000..9574204abc4f1 --- /dev/null +++ b/src/test/pytest/pypg/errors.py @@ -0,0 +1,42 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""Exception taxonomy for the pypg test framework. + +The framework signals failure with exceptions rather than status returns, so a +test that does not explicitly tolerate a failure fails loudly at the point it +occurs. The hierarchy is: + + RuntimeError + PgServerError -- server lifecycle (start/stop/restart/backup) + PgSqlError (= LibpqError) -- a SQL statement failed on the server + +``PgSqlError`` is an alias for ``libpq.LibpqError`` (which lives in the lower +libpq layer to avoid a circular import); it carries the PostgreSQL diagnostic +fields (SQLSTATE, severity, detail, hint, ...) when they are available, whether +the statement ran in-process through libpq or was shelled out to psql. Tests +inspect ``sqlstate`` / ``sqlstate_class`` instead of regex-matching messages. +""" + +from __future__ import annotations + +from libpq.errors import LibpqError + +# A failed SQL statement, regardless of execution channel (libpq or psql). +PgSqlError = LibpqError + + +class PgError(RuntimeError): + """Base class for non-SQL errors raised by the pypg framework.""" + + +class PgServerError(PgError): + """A PostgreSQL server lifecycle operation failed. + + Raised by start/stop/restart/promote/backup and similar cluster operations. + A test that expects such a failure should assert on it with + ``pytest.raises(PgServerError)`` or tolerate it with + ``contextlib.suppress(PgServerError)`` rather than passing a status flag. + """ + + +__all__ = ["PgError", "PgServerError", "PgSqlError", "LibpqError"] diff --git a/src/test/pytest/pypg/server.py b/src/test/pytest/pypg/server.py index 72a391fd87924..bcf3b945c7bb7 100644 --- a/src/test/pytest/pypg/server.py +++ b/src/test/pytest/pypg/server.py @@ -18,6 +18,7 @@ from ._env import test_timeout_default from .command import CommandResult, PgBin from .bgpsql import BackgroundPsql +from .errors import PgServerError, PgSqlError from .interactive import InteractivePsql from .util import append_to_file, eprint, run, slurp_file from libpq import PGconn, connect as libpq_connect @@ -123,6 +124,41 @@ def set(self, **gucs): WINDOWS_OS = platform.system() == "Windows" +# psql, with the verbose-error settings the framework uses, prefixes the SQLSTATE +# line as e.g. "psql:...: ERROR: ..." plus optional DETAIL/HINT lines. +_PSQL_PRIMARY_RE = re.compile( + r"^(?:psql:[^:]*:\d+: )?(?:ERROR|FATAL|PANIC):\s+(.*)$", re.M +) +_PSQL_FIELD_RES = { + "detail": re.compile(r"^DETAIL:\s+(.*)$", re.M), + "hint": re.compile(r"^HINT:\s+(.*)$", re.M), + "context": re.compile(r"^CONTEXT:\s+(.*)$", re.M), +} + + +def _parse_psql_diagnostics(stderr): + """Extract the diagnostic fields psql prints into PgSqlError kwargs. + + psql's text protocol does not surface the SQLSTATE, so sqlstate stays None; + the primary message and any DETAIL/HINT/CONTEXT lines are recovered so the + error object is still introspectable. + """ + fields = {} + match = _PSQL_PRIMARY_RE.search(stderr or "") + if match: + fields["primary"] = match.group(1).strip() + for name, regex in _PSQL_FIELD_RES.items(): + found = regex.search(stderr or "") + if found: + fields[name] = found.group(1).strip() + return fields + + +def _psql_error_message(query, stderr): + """Build a PgSqlError message from a failed psql invocation.""" + text = (stderr or "").strip() + return "SQL failed: {}\nquery was: {}".format(text, query) + class PostgresServer: """ @@ -470,7 +506,7 @@ def _restore_tar_backup(self, backup_path, tar_program, tablespace_map): for tstar in tstars: tsoid = re.sub(r"\.tar$", "", tstar) if tsoid not in tablespace_map: - raise RuntimeError("no tablespace mapping for {}".format(tstar)) + raise PgServerError("no tablespace mapping for {}".format(tstar)) newdir = tablespace_map[tsoid] os.mkdir(newdir) run(tar_program, "xf", backup_path / tstar, "-C", newdir) @@ -506,7 +542,7 @@ def start(self, fail_ok=False): return False # pg_ctl's own output rarely says why startup failed; include the # server log, which holds the actual startup error. - raise RuntimeError( + raise PgServerError( 'pg_ctl start failed for node "{}":\n--- {} ---\n{}'.format( self.name, self.log, self._log_text() ) @@ -701,7 +737,7 @@ def pg_recvlogical_upto( check=False, ) if proc.returncode != 0: - raise RuntimeError( + raise PgServerError( "pg_recvlogical exited with {}, stdout {!r} stderr {!r}".format( proc.returncode, proc.stdout, proc.stderr ) @@ -907,8 +943,9 @@ def safe_psql( timeout=timeout, ) if proc.returncode != 0: - raise subprocess.CalledProcessError( - proc.returncode, cmd, proc.stdout, proc.stderr + raise PgSqlError( + _psql_error_message(query, proc.stderr), + **_parse_psql_diagnostics(proc.stderr), ) return proc.stdout.rstrip("\n") @@ -1455,9 +1492,11 @@ def restart(self, mode="fast", fail_ok=False, log_like=None, log_unlike=None): offset = self.current_log_position() try: self.pg_ctl("restart", "--mode", mode) - except subprocess.CalledProcessError: + except subprocess.CalledProcessError as exc: if not fail_ok: - raise + raise PgServerError( + 'restart failed for node "{}"'.format(self.name) + ) from exc self._check_log_patterns("restart", offset, log_like, log_unlike) return False with open(os.path.join(self.datadir, "postmaster.pid"), encoding="utf-8") as f: From b865e766f59df9b9661f20dbc9a6589236869968 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Thu, 11 Jun 2026 09:07:17 -0400 Subject: [PATCH 22/36] pytest: give command results behavior (ProgramResult dataclass) Replace the bare CommandResult namedtuple with a ProgramResult dataclass that carries the assertions tests make on a program run: .ok/.failed properties, .assert_ok()/.assert_failed()/.assert_exit()/.assert_stdout_like()/ .assert_stderr_like(), and a __str__ that self-documents the exit code and output for failure messages. The PgBin command_* helpers now read against .ok/.failed/.exit_code. CommandResult stays as an alias and .rc as a property, and the dataclass is iterable as (exit_code, stdout, stderr), so the ~114 existing call sites keep working unchanged; later commits migrate them to the expressive API. --- src/test/pytest/pypg/__init__.py | 3 +- src/test/pytest/pypg/command.py | 119 +++++++++++++++++++++++++------ src/test/pytest/pypg/server.py | 6 +- 3 files changed, 104 insertions(+), 24 deletions(-) diff --git a/src/test/pytest/pypg/__init__.py b/src/test/pytest/pypg/__init__.py index 8842f04fac49f..09b634ffb031b 100644 --- a/src/test/pytest/pypg/__init__.py +++ b/src/test/pytest/pypg/__init__.py @@ -5,7 +5,7 @@ skip_unless_test_extras, test_timeout_default, ) -from .command import CommandResult, PgBin +from .command import CommandResult, PgBin, ProgramResult from .errors import PgError, PgServerError, PgSqlError, LibpqError from .fake import faker, meaningful_text, rand_str from .kerberos import KerberosServer @@ -34,6 +34,7 @@ "PostgresServer", "PgBin", "CommandResult", + "ProgramResult", "PgError", "PgServerError", "PgSqlError", diff --git a/src/test/pytest/pypg/command.py b/src/test/pytest/pypg/command.py index 6a5fb6cff175f..1cff64050c8aa 100644 --- a/src/test/pytest/pypg/command.py +++ b/src/test/pytest/pypg/command.py @@ -12,12 +12,89 @@ import os import re import subprocess -from collections import namedtuple +from dataclasses import dataclass from typing import Dict, List, Optional, Sequence from .util import run_captured -CommandResult = namedtuple("CommandResult", ["rc", "stdout", "stderr"]) + +@dataclass(frozen=True) +class ProgramResult: + """The outcome of running a PostgreSQL client program. + + Carries the exit code and captured output, and the assertions tests make on + them, so a check reads as one expressive call (``result.assert_ok()``) + rather than a hand-written ``assert result.exit_code == 0`` plus a + diagnostic string. Iterable as ``(exit_code, stdout, stderr)`` for the + legacy tuple-unpacking call sites. + """ + + exit_code: int + stdout: str + stderr: str + + @property + def rc(self) -> int: + """Deprecated alias for :attr:`exit_code`.""" + return self.exit_code + + @property + def ok(self) -> bool: + """Whether the program exited with code 0.""" + return self.exit_code == 0 + + @property + def failed(self) -> bool: + """Whether the program exited with a nonzero code.""" + return self.exit_code != 0 + + def __iter__(self): + return iter((self.exit_code, self.stdout, self.stderr)) + + def __str__(self) -> str: + return ( + f"exit code: {self.exit_code}\n" + f"stdout:\n{self.stdout}\n" + f"stderr:\n{self.stderr}" + ) + + def assert_ok(self, msg: Optional[str] = None) -> "ProgramResult": + """Assert the program exited 0; return self for chaining.""" + assert self.ok, _prefix(msg, "expected success\n" + str(self)) + return self + + def assert_failed(self, msg: Optional[str] = None) -> "ProgramResult": + """Assert the program exited nonzero; return self for chaining.""" + assert self.failed, _prefix(msg, "expected failure\n" + str(self)) + return self + + def assert_exit(self, code: int, msg: Optional[str] = None) -> "ProgramResult": + """Assert the program exited with *code*; return self.""" + assert self.exit_code == code, _prefix(msg, f"expected exit {code}\n{self}") + return self + + def assert_stdout_like(self, pattern: str, msg=None) -> "ProgramResult": + """Assert stdout matches *pattern*; return self.""" + assert re.search(pattern, self.stdout), _prefix( + msg, f"stdout did not match {pattern!r}\n{self}" + ) + return self + + def assert_stderr_like(self, pattern: str, msg=None) -> "ProgramResult": + """Assert stderr matches *pattern*; return self.""" + assert re.search(pattern, self.stderr), _prefix( + msg, f"stderr did not match {pattern!r}\n{self}" + ) + return self + + +# Backward-compatible alias for the former namedtuple. +CommandResult = ProgramResult + + +def _prefix(msg: Optional[str], text: str) -> str: + return f"{msg}: {text}" if msg else text + # Programs are expected to keep --help output lines within this width. Matches # PostgreSQL::Test::Utils::program_help_ok. @@ -43,7 +120,7 @@ def _describe(cmd: Sequence, result: CommandResult) -> str: ) return ( f"command: {argv}\n" - f"exit code: {result.rc}\n" + f"exit code: {result.exit_code}\n" f"stdout:\n{result.stdout}\n" f"stderr:\n{result.stderr}" ) @@ -86,7 +163,7 @@ def result(self, cmd: Sequence, *, extra_env=None) -> CommandResult: Co-authored-by: Andrew Dunstan """ returncode, stdout, stderr = run_captured(_argv(cmd), env=self._env(extra_env)) - return CommandResult(returncode, stdout, stderr) + return ProgramResult(returncode, stdout, stderr) def popen(self, cmd: Sequence, *, extra_env=None) -> subprocess.Popen: """Start cmd as a long-lived background process (PATH set to bindir). @@ -123,34 +200,36 @@ def run_command(self, cmd: Sequence, *, extra_env=None) -> CommandResult: Perl helper, so equality checks against the captured strings match. """ result = self.result(cmd, extra_env=extra_env) - return CommandResult( - result.rc, + return ProgramResult( + result.exit_code, result.stdout.removesuffix("\n"), result.stderr.removesuffix("\n"), ) - def command_ok(self, cmd, msg=None, *, extra_env=None) -> CommandResult: + def command_ok(self, cmd, msg=None, *, extra_env=None) -> ProgramResult: """Assert the command exits with code 0.""" result = self.result(cmd, extra_env=extra_env) - assert result.rc == 0, _assert_msg(msg, "expected success", cmd, result) + assert result.ok, _assert_msg(msg, "expected success", cmd, result) return result - def command_fails(self, cmd, msg=None, *, extra_env=None) -> CommandResult: + def command_fails(self, cmd, msg=None, *, extra_env=None) -> ProgramResult: """Assert the command exits with a nonzero code.""" result = self.result(cmd, extra_env=extra_env) - assert result.rc != 0, _assert_msg(msg, "expected failure", cmd, result) + assert result.failed, _assert_msg(msg, "expected failure", cmd, result) return result - def command_exit_is(self, cmd, code, msg=None, *, extra_env=None) -> CommandResult: + def command_exit_is(self, cmd, code, msg=None, *, extra_env=None) -> ProgramResult: """Assert the command exits with the given code.""" result = self.result(cmd, extra_env=extra_env) - assert result.rc == code, _assert_msg(msg, f"expected exit {code}", cmd, result) + assert result.exit_code == code, _assert_msg( + msg, f"expected exit {code}", cmd, result + ) return result - def command_like(self, cmd, pattern, msg=None, *, extra_env=None) -> CommandResult: + def command_like(self, cmd, pattern, msg=None, *, extra_env=None) -> ProgramResult: """Assert success and that stdout matches pattern.""" result = self.result(cmd, extra_env=extra_env) - assert result.rc == 0, _assert_msg(msg, "expected success", cmd, result) + assert result.ok, _assert_msg(msg, "expected success", cmd, result) assert re.search(pattern, result.stdout), _assert_msg( msg, f"stdout did not match {pattern!r}", cmd, result ) @@ -159,7 +238,7 @@ def command_like(self, cmd, pattern, msg=None, *, extra_env=None) -> CommandResu def command_fails_like(self, cmd, pattern, msg=None, *, extra_env=None): """Assert failure and that stderr matches pattern.""" result = self.result(cmd, extra_env=extra_env) - assert result.rc != 0, _assert_msg(msg, "expected failure", cmd, result) + assert result.failed, _assert_msg(msg, "expected failure", cmd, result) assert re.search(pattern, result.stderr), _assert_msg( msg, f"stderr did not match {pattern!r}", cmd, result ) @@ -177,7 +256,7 @@ def command_ok_or_fails_like( be unsupported on the platform (e.g. pg_upgrade --clone). """ result = self.result(cmd, extra_env=extra_env) - if result.rc != 0: + if result.failed: assert re.search(expected_stdout, result.stdout), _assert_msg( msg, f"stdout did not match {expected_stdout!r}", cmd, result ) @@ -190,7 +269,7 @@ def command_ok_or_fails_like( def command_checks_all(self, cmd, exit_code, stdout_res, stderr_res, msg=None): """Assert the exit code and that every stdout/stderr regex matches.""" result = self.result(cmd) - assert result.rc == exit_code, _assert_msg( + assert result.exit_code == exit_code, _assert_msg( msg, f"expected exit {exit_code}", cmd, result ) for pattern in stdout_res: @@ -207,7 +286,7 @@ def program_help_ok(self, name): """--help exits 0, writes stdout, nothing to stderr, lines <= 95 chars.""" cmd = [name, "--help"] result = self.result(cmd) - assert result.rc == 0, _describe(cmd, result) + assert result.ok, _describe(cmd, result) assert result.stdout != "", f"{name} --help produced no stdout" assert result.stderr == "", f"{name} --help wrote to stderr:\n{result.stderr}" long_lines = [ @@ -222,7 +301,7 @@ def program_version_ok(self, name): """--version exits 0, writes stdout, nothing to stderr.""" cmd = [name, "--version"] result = self.result(cmd) - assert result.rc == 0, _describe(cmd, result) + assert result.ok, _describe(cmd, result) assert result.stdout != "", f"{name} --version produced no stdout" assert result.stderr == "", f"{name} --version wrote stderr:\n{result.stderr}" return result @@ -231,7 +310,7 @@ def program_options_handling_ok(self, name): """An invalid option gives a nonzero exit and an error message.""" cmd = [name, "--not-a-valid-option"] result = self.result(cmd) - assert result.rc != 0, f"{name} accepted an invalid option" + assert result.failed, f"{name} accepted an invalid option" assert result.stderr != "", f"{name} printed no error for an invalid option" return result diff --git a/src/test/pytest/pypg/server.py b/src/test/pytest/pypg/server.py index bcf3b945c7bb7..16191af1c38f8 100644 --- a/src/test/pytest/pypg/server.py +++ b/src/test/pytest/pypg/server.py @@ -16,7 +16,7 @@ from typing import Callable, Optional, Tuple from ._env import test_timeout_default -from .command import CommandResult, PgBin +from .command import PgBin, ProgramResult from .bgpsql import BackgroundPsql from .errors import PgServerError, PgSqlError from .interactive import InteractivePsql @@ -861,7 +861,7 @@ def psql_capture( timeout=None, ): """ - Run psql with query piped on stdin and return CommandResult(rc, stdout, + Run psql with query piped on stdin and return ProgramResult(rc, stdout, stderr) without raising. Mirrors PostgreSQL::Test::Cluster->psql in list context: --no-psqlrc --no-align --tuples-only --quiet, ON_ERROR_STOP by default (a SQL error then yields exit code 3), with an optional @@ -903,7 +903,7 @@ def psql_capture( # Match Cluster->psql, which chomps a single trailing newline off each. stdout = proc.stdout[:-1] if proc.stdout.endswith("\n") else proc.stdout stderr = proc.stderr[:-1] if proc.stderr.endswith("\n") else proc.stderr - return CommandResult(proc.returncode, stdout, stderr) + return ProgramResult(proc.returncode, stdout, stderr) def safe_psql( self, query, dbname="postgres", timeout=None, extra_env=None, connstr=None From 43ee5c909942ede4e2bd6f436aa0396e71220640 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 09:32:08 -0400 Subject: [PATCH 23/36] pytest: add a typed SQL entry point (node.sql -> SqlResult) Introduce PostgresServer.sql(), the framework's primary way to run SQL. It returns an explicit SqlResult with named accessors -- .scalar(), .row(), .column(), .rows -- instead of a value whose shape (scalar / tuple / list) the caller has to infer, and raises PgSqlError on error. The default "psql" channel pipes the SQL to the psql client (each statement in its own implicit transaction, matching Perl safe_psql); channel="libpq" runs it in-process over a fresh libpq connection for protocol-level or single- transaction needs, raising the same PgSqlError. safe_psql is kept (now sharing the psql machinery via _psql_text) for the per-statement string contract its 3000+ call sites rely on; the per-area test migration replaces it with sql() later. Removes the old libpq-only node.sql() (its one caller ignored the return). Adds pyt/test_node_sql.py covering both channels and the accessors. --- pyproject.toml | 2 +- src/test/pytest/meson.build | 1 + src/test/pytest/pypg/__init__.py | 2 + src/test/pytest/pypg/server.py | 79 +++++++++++++++++--- src/test/pytest/pypg/sqlresult.py | 105 +++++++++++++++++++++++++++ src/test/pytest/pyt/test_node_sql.py | 73 +++++++++++++++++++ 6 files changed, 250 insertions(+), 12 deletions(-) create mode 100644 src/test/pytest/pypg/sqlresult.py create mode 100644 src/test/pytest/pyt/test_node_sql.py diff --git a/pyproject.toml b/pyproject.toml index fcd6b6a61633a..d18fa56588293 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,7 +103,7 @@ py-version = "3.9" max-line-length = 88 # PostgresServer is a large facade mirroring PostgreSQL::Test::Cluster (~3000 # lines of Perl); allow it room rather than splitting it artificially. -max-module-lines = 2000 +max-module-lines = 2200 [tool.pylint."messages control"] # Curated relaxations for pytest/ctypes idioms (not bug-hiding). The checks that diff --git a/src/test/pytest/meson.build b/src/test/pytest/meson.build index 17c3944bee6bd..f7fbe4a579416 100644 --- a/src/test/pytest/meson.build +++ b/src/test/pytest/meson.build @@ -14,6 +14,7 @@ tests += { 'pyt/test_fake.py', 'pyt/test_libpq.py', 'pyt/test_multi_server.py', + 'pyt/test_node_sql.py', 'pyt/test_query_helpers.py', ], }, diff --git a/src/test/pytest/pypg/__init__.py b/src/test/pytest/pypg/__init__.py index 09b634ffb031b..54d959f67f624 100644 --- a/src/test/pytest/pypg/__init__.py +++ b/src/test/pytest/pypg/__init__.py @@ -10,6 +10,7 @@ from .fake import faker, meaningful_text, rand_str from .kerberos import KerberosServer from .server import PostgresServer +from .sqlresult import SqlResult from .util import ( wait_for_file, compare_files, @@ -32,6 +33,7 @@ "rand_str", "KerberosServer", "PostgresServer", + "SqlResult", "PgBin", "CommandResult", "ProgramResult", diff --git a/src/test/pytest/pypg/server.py b/src/test/pytest/pypg/server.py index 16191af1c38f8..60fcc500fa121 100644 --- a/src/test/pytest/pypg/server.py +++ b/src/test/pytest/pypg/server.py @@ -13,15 +13,16 @@ import tempfile import time from collections import namedtuple -from typing import Callable, Optional, Tuple +from typing import Callable, Dict, Optional, Tuple from ._env import test_timeout_default from .command import PgBin, ProgramResult from .bgpsql import BackgroundPsql from .errors import PgServerError, PgSqlError +from .sqlresult import SqlResult from .interactive import InteractivePsql from .util import append_to_file, eprint, run, slurp_file -from libpq import PGconn, connect as libpq_connect +from libpq import PGconn, connect as libpq_connect, ExecStatus class FileBackup(contextlib.AbstractContextManager): @@ -136,14 +137,14 @@ def set(self, **gucs): } -def _parse_psql_diagnostics(stderr): +def _parse_psql_diagnostics(stderr) -> Dict[str, str]: """Extract the diagnostic fields psql prints into PgSqlError kwargs. psql's text protocol does not surface the SQLSTATE, so sqlstate stays None; the primary message and any DETAIL/HINT/CONTEXT lines are recovered so the error object is still introspectable. """ - fields = {} + fields: Dict[str, str] = {} match = _PSQL_PRIMARY_RE.search(stderr or "") if match: fields["primary"] = match.group(1).strip() @@ -845,11 +846,6 @@ def psql(self, *args): """Run psql with the given arguments.""" self._run(os.path.join(self._bindir, "psql"), "-w", *args) - def sql(self, query): - """Execute a SQL query via libpq. Returns simplified results.""" - with self.connect() as conn: - return conn.sql(query) - def psql_capture( self, query, @@ -917,6 +913,21 @@ def safe_psql( environment variables (e.g. PGOPTIONS, PGUSER). A connstr overrides the --dbname target (merged with PGHOST/PGPORT from the environment), used by the SSL tests to pick a specific cert/host combination. + + Prefer :meth:`sql`, which returns a typed :class:`SqlResult` rather than + a bare string; safe_psql is kept for the per-statement string contract. + """ + stdout = self._psql_text( + query, dbname=dbname, timeout=timeout, extra_env=extra_env, connstr=connstr + ) + return stdout.rstrip("\n") + + def _psql_text( + self, query, *, dbname="postgres", timeout=None, extra_env=None, connstr=None + ): + """Run query through psql (tuples-only, unaligned, ON_ERROR_STOP) and + return its raw stdout, raising PgSqlError on a nonzero exit. Shared by + safe_psql and sql. """ if connstr is None: connstr = self.dbname_connstr(dbname) @@ -943,11 +954,57 @@ def safe_psql( timeout=timeout, ) if proc.returncode != 0: + diags = _parse_psql_diagnostics(proc.stderr) raise PgSqlError( _psql_error_message(query, proc.stderr), - **_parse_psql_diagnostics(proc.stderr), + primary=diags.get("primary"), + detail=diags.get("detail"), + hint=diags.get("hint"), + context=diags.get("context"), ) - return proc.stdout.rstrip("\n") + return proc.stdout + + def sql( + self, + query, + *, + dbname="postgres", + timeout=None, + extra_env=None, + connstr=None, + channel="psql", + ) -> SqlResult: + """Run query and return a typed :class:`SqlResult`, raising on error. + + This is the framework's primary SQL entry point. The default ``psql`` + channel pipes the SQL to the psql client, so each statement runs in its + own implicit transaction (CREATE DATABASE and other + non-transaction-block statements work) -- the same semantics as Perl's + safe_psql. Pass ``channel="libpq"`` to run the query in-process over a + fresh libpq connection (one connection, useful for protocol-level tests + or a single transaction); errors raise the same PgSqlError either way. + + Use the result's accessors to say what shape you expect: + ``.scalar()``, ``.row()``, ``.column()``, or ``.rows``. + """ + if channel == "libpq": + rows: list = [] + with self.connect(dbname=dbname) as conn: + result = conn.exec(query) + status = result.status() + if status == ExecStatus.PGRES_TUPLES_OK: + rows = result.fetch_all() + elif status != ExecStatus.PGRES_COMMAND_OK: + result.raise_error() + return SqlResult([tuple(str(c) for c in row) for row in rows]) + if channel != "psql": + raise ValueError( + "channel must be 'psql' or 'libpq', got {!r}".format(channel) + ) + stdout = self._psql_text( + query, dbname=dbname, timeout=timeout, extra_env=extra_env, connstr=connstr + ) + return SqlResult.from_psql(stdout) def check_extension(self, extname): """Return True if extname is available (in pg_available_extensions). diff --git a/src/test/pytest/pypg/sqlresult.py b/src/test/pytest/pypg/sqlresult.py new file mode 100644 index 0000000000000..0fbcc367ca442 --- /dev/null +++ b/src/test/pytest/pypg/sqlresult.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""The result of running a SQL query through :meth:`PostgresServer.sql`. + +A single explicit type with named accessors, rather than a value that is +sometimes a scalar, sometimes a tuple, and sometimes a list (the shape the +in-process libpq layer infers). Callers say what they expect: + + node.sql("SELECT 1").scalar() -> "1" + node.sql("SELECT a, b FROM t").row() -> ("a-val", "b-val") + node.sql("SELECT x FROM t").column() -> ["x1", "x2", ...] + node.sql("SELECT a, b FROM t").rows -> [("a", "b"), ...] + node.sql("INSERT ...").rows -> [] (no rows) + +Values are the text psql prints (unaligned, tuples-only). The result is truthy +when it has at least one row, and iterating it yields the rows. +""" + +from __future__ import annotations + +from typing import List, Optional, Tuple + +# psql's default unaligned field separator. +_FIELD_SEP = "|" + + +class SqlResult: + """Rows returned by a SQL statement, with explicit accessors. + + Constructed from psql's unaligned tuples-only stdout. A statement that + returns no rows (INSERT/DDL, or an empty SELECT) yields an empty result. + """ + + def __init__(self, rows: List[Tuple[str, ...]], raw: str = ""): + self._rows = rows + self.raw = raw + + @classmethod + def from_psql(cls, stdout: str) -> "SqlResult": + """Parse psql --no-align --tuples-only stdout into rows. + + Each non-final newline delimits a row; each row splits on the unaligned + field separator. A wholly empty output is zero rows. The trailing + newline psql emits is not a row. + """ + text = stdout + body = text[:-1] if text.endswith("\n") else text + if body == "": + return cls([], raw=stdout) + rows = [tuple(line.split(_FIELD_SEP)) for line in body.split("\n")] + return cls(rows, raw=stdout) + + @property + def rows(self) -> List[Tuple[str, ...]]: + """All rows as tuples of column-text values.""" + return self._rows + + def scalar(self) -> Optional[str]: + """The single value of a one-row, one-column result. + + Returns None for an empty result (no rows). Raises if the result has + more than one row or the row has more than one column, so a mis-shaped + query is caught rather than silently truncated. + """ + if not self._rows: + return None + if len(self._rows) != 1 or len(self._rows[0]) != 1: + raise ValueError( + "scalar() expects exactly one row and one column, got " + f"{len(self._rows)} row(s) of " + f"{len(self._rows[0]) if self._rows else 0} column(s)" + ) + return self._rows[0][0] + + def row(self) -> Optional[Tuple[str, ...]]: + """The single row of a one-row result (any number of columns). + + Returns None for an empty result; raises if there is more than one row. + """ + if not self._rows: + return None + if len(self._rows) != 1: + raise ValueError(f"row() expects exactly one row, got {len(self._rows)}") + return self._rows[0] + + def column(self, index: int = 0) -> List[str]: + """The values of column *index* across every row.""" + return [r[index] for r in self._rows] + + def __bool__(self) -> bool: + return bool(self._rows) + + def __len__(self) -> int: + return len(self._rows) + + def __iter__(self): + return iter(self._rows) + + def __eq__(self, other) -> bool: + if isinstance(other, SqlResult): + return self._rows == other._rows + return NotImplemented + + def __repr__(self) -> str: + return f"SqlResult({self._rows!r})" diff --git a/src/test/pytest/pyt/test_node_sql.py b/src/test/pytest/pyt/test_node_sql.py new file mode 100644 index 0000000000000..7504ff29a79a7 --- /dev/null +++ b/src/test/pytest/pyt/test_node_sql.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""Tests for the node-level sql() API and its typed SqlResult.""" + +import pytest + +import pypg + + +def test_sql_scalar(pg): + """A one-row, one-column result yields its scalar value as text.""" + assert pg.sql("SELECT 1").scalar() == "1" + assert pg.sql("SELECT 'hello'").scalar() == "hello" + + +def test_sql_empty_is_falsey(pg): + """A result with no rows is falsey and scalar() is None.""" + result = pg.sql("SELECT 1 WHERE false") + assert not result + assert len(result) == 0 + assert result.scalar() is None + + +def test_sql_row_and_columns(pg): + """row() returns the single row; column() projects one column.""" + assert pg.sql("SELECT 1, 'a'").row() == ("1", "a") + assert pg.sql("SELECT x FROM (VALUES (1), (2), (3)) v(x) ORDER BY x").column() == [ + "1", + "2", + "3", + ] + assert pg.sql("SELECT x FROM (VALUES (1), (2), (3)) v(x) ORDER BY x").rows == [ + ("1",), + ("2",), + ("3",), + ] + + +def test_sql_raises_pgsqlerror(pg): + """A failing statement raises PgSqlError, not a bare CalledProcessError.""" + with pytest.raises(pypg.PgSqlError): + pg.sql("SELECT * FROM no_such_table") + + +def test_sql_error_carries_primary(pg): + """The raised error exposes the primary message text from psql.""" + with pytest.raises(pypg.PgSqlError) as excinfo: + pg.sql("SELECT * FROM no_such_table") + assert "no_such_table" in str(excinfo.value) + + +def test_sql_libpq_channel(pg): + """The libpq channel returns the same shape as the psql channel.""" + assert pg.sql("SELECT 42", channel="libpq").scalar() == "42" + + +def test_sql_libpq_channel_raises(pg): + """The libpq channel also raises PgSqlError on a bad statement.""" + with pytest.raises(pypg.PgSqlError): + pg.sql("SELECT * FROM no_such_table", channel="libpq") + + +def test_sql_bad_channel(pg): + """An unknown channel is a ValueError.""" + with pytest.raises(ValueError): + pg.sql("SELECT 1", channel="bogus") + + +def test_scalar_rejects_multi_row(pg): + """scalar() refuses a result that is not exactly one cell.""" + result = pg.sql("SELECT * FROM (VALUES (1), (2)) v") + with pytest.raises(ValueError): + result.scalar() From 5ffa64cae85006dc651630b0825645cd259b088b Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 09:34:45 -0400 Subject: [PATCH 24/36] pytest: enums for server modes, keyword-only config args Add StopMode / CatchupMode / SlotCatchupMode (str-valued enums whose members equal the literals PostgreSQL expects) and accept either the enum or the bare string in stop()/restart()/wait_for_catchup()/wait_for_slot_catchup(), giving IDE completion and typo-safety while old string call sites keep working. Make adjust_conf's filename/skip_equals keyword-only so the (setting, value) order is unambiguous -- the positional Perl arg order was the exact shape that caused the test_005 "ssl" -> file "on" bug fixed earlier. --- src/test/pytest/pypg/__init__.py | 4 ++++ src/test/pytest/pypg/modes.py | 38 ++++++++++++++++++++++++++++++ src/test/pytest/pypg/server.py | 40 +++++++++++++++++++++----------- 3 files changed, 68 insertions(+), 14 deletions(-) create mode 100644 src/test/pytest/pypg/modes.py diff --git a/src/test/pytest/pypg/__init__.py b/src/test/pytest/pypg/__init__.py index 54d959f67f624..809e736ffc822 100644 --- a/src/test/pytest/pypg/__init__.py +++ b/src/test/pytest/pypg/__init__.py @@ -8,6 +8,7 @@ from .command import CommandResult, PgBin, ProgramResult from .errors import PgError, PgServerError, PgSqlError, LibpqError from .fake import faker, meaningful_text, rand_str +from .modes import CatchupMode, SlotCatchupMode, StopMode from .kerberos import KerberosServer from .server import PostgresServer from .sqlresult import SqlResult @@ -34,6 +35,9 @@ "KerberosServer", "PostgresServer", "SqlResult", + "StopMode", + "CatchupMode", + "SlotCatchupMode", "PgBin", "CommandResult", "ProgramResult", diff --git a/src/test/pytest/pypg/modes.py b/src/test/pytest/pypg/modes.py new file mode 100644 index 0000000000000..cb6cb71c3e240 --- /dev/null +++ b/src/test/pytest/pypg/modes.py @@ -0,0 +1,38 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""Enumerations for the small set of mode strings the framework passes through. + +Each enum subclasses ``str`` so its members compare equal to, and serialize as, +the exact literal PostgreSQL expects (the value handed to ``pg_ctl --mode`` or +interpolated into a ``pg_stat_replication`` column name). Methods accept either +the enum or the bare string, so call sites can adopt the enums incrementally +while old string call sites keep working. +""" + +from __future__ import annotations + +from enum import Enum + + +class StopMode(str, Enum): + """How ``pg_ctl stop`` / ``restart`` shuts the server down.""" + + SMART = "smart" + FAST = "fast" + IMMEDIATE = "immediate" + + +class CatchupMode(str, Enum): + """Which ``pg_stat_replication`` LSN column ``wait_for_catchup`` waits on.""" + + SENT = "sent" + WRITE = "write" + FLUSH = "flush" + REPLAY = "replay" + + +class SlotCatchupMode(str, Enum): + """Which slot LSN ``wait_for_slot_catchup`` waits on.""" + + RESTART = "restart" + CONFIRMED_FLUSH = "confirmed_flush" diff --git a/src/test/pytest/pypg/server.py b/src/test/pytest/pypg/server.py index 60fcc500fa121..2524bf7e9778e 100644 --- a/src/test/pytest/pypg/server.py +++ b/src/test/pytest/pypg/server.py @@ -19,6 +19,7 @@ from .command import PgBin, ProgramResult from .bgpsql import BackgroundPsql from .errors import PgServerError, PgSqlError +from .modes import CatchupMode, SlotCatchupMode, StopMode from .sqlresult import SqlResult from .interactive import InteractivePsql from .util import append_to_file, eprint, run, slurp_file @@ -745,7 +746,9 @@ def pg_recvlogical_upto( ) return proc.stdout - def wait_for_slot_catchup(self, slot_name, mode="restart", target_lsn=None): + def wait_for_slot_catchup( + self, slot_name, mode=SlotCatchupMode.RESTART, target_lsn=None + ): """Wait until slot_name's _lsn passes target_lsn. Mirrors Cluster->wait_for_slot_catchup. mode is 'restart' or @@ -753,6 +756,7 @@ def wait_for_slot_catchup(self, slot_name, mode="restart", target_lsn=None): """ if mode not in ("restart", "confirmed_flush"): raise ValueError("valid modes are restart, confirmed_flush") + mode = SlotCatchupMode(mode).value if target_lsn is None: raise ValueError("target lsn must be specified") assert self.poll_query_until( @@ -1267,14 +1271,16 @@ def append_conf(self, text, filename="postgresql.conf"): append_to_file(self.datadir / filename, text + "\n") def adjust_conf( - self, setting, value, filename="postgresql.conf", skip_equals=False + self, setting, value, *, filename="postgresql.conf", skip_equals=False ): """Rewrite a config file, replacing or removing a setting in place. Mirrors PostgreSQL::Test::Cluster->adjust_conf: every line that sets `setting` is dropped; if `value` is not None a single new line setting it is written in its place (other lines preserved). The file mode is - reset to match the data dir's group accessibility. + reset to match the data dir's group accessibility. `filename` and + `skip_equals` are keyword-only so the (setting, value) order is + unambiguous. """ conffile = self.datadir / filename eq = "" if skip_equals else "= " @@ -1537,18 +1543,21 @@ def clean_node(self): if self.datadir.exists(): shutil.rmtree(self.datadir) - def restart(self, mode="fast", fail_ok=False, log_like=None, log_unlike=None): + def restart( + self, mode=StopMode.FAST, fail_ok=False, log_like=None, log_unlike=None + ): """Restart the server via pg_ctl restart and refresh the postmaster PID. - Mirrors PostgreSQL::Test::Cluster->restart. With fail_ok=True a failed - restart returns False (1 in Perl maps to True here for success) instead - of raising, and log_like/log_unlike (lists of regexes) are asserted - against the log emitted during the restart attempt. Returns True on a - successful restart, False on failure (only when fail_ok). + Mirrors PostgreSQL::Test::Cluster->restart. *mode* is a StopMode (or the + equivalent string). With fail_ok=True a failed restart returns False + instead of raising, and log_like/log_unlike (lists of regexes) are + asserted against the log emitted during the restart attempt. Returns + True on a successful restart, False on failure (only when fail_ok). """ + mode = StopMode(mode) offset = self.current_log_position() try: - self.pg_ctl("restart", "--mode", mode) + self.pg_ctl("restart", "--mode", mode.value) except subprocess.CalledProcessError as exc: if not fail_ok: raise PgServerError( @@ -1612,7 +1621,7 @@ def wait_for_replay_catchup(self, standby, node=None): source = node if node is not None else self self.wait_for_catchup(standby, "replay", source.lsn("flush")) - def wait_for_catchup(self, standby, mode="replay", target_lsn=None): + def wait_for_catchup(self, standby, mode=CatchupMode.REPLAY, target_lsn=None): """ Wait until a standby has caught up to target_lsn (default: this node's current write/replay LSN), by polling pg_stat_replication. Mirrors @@ -1623,6 +1632,7 @@ def wait_for_catchup(self, standby, mode="replay", target_lsn=None): valid_modes = ("sent", "write", "flush", "replay") if mode not in valid_modes: raise ValueError("unknown mode {!r} for wait_for_catchup".format(mode)) + mode = CatchupMode(mode).value standby_name = standby.name if isinstance(standby, PostgresServer) else standby @@ -1900,14 +1910,16 @@ def subcontext(self): self._cleanup_stack.__exit__(None, None, None) self._cleanup_stack = old_stack - def stop(self, mode="fast"): + def stop(self, mode=StopMode.FAST): """ Stop the PostgreSQL server instance. - Ignores failures if the server is already stopped. + *mode* is a StopMode (or the equivalent string). Ignores failures if the + server is already stopped. """ + mode = StopMode(mode) try: - self.pg_ctl("stop", "--mode", mode) + self.pg_ctl("stop", "--mode", mode.value) except subprocess.CalledProcessError: # Server may have already been stopped pass From 65155f0e4140a3e7834b6b6594018399816447d4 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 09:37:24 -0400 Subject: [PATCH 25/36] pytest: pathlib paths and one text-capture subprocess chokepoint Replace the residual os.path.join/os.path.isdir in server.py with pathlib (the datadir and bindir are already Path objects), and funnel the five duplicated subprocess.run(..., stdout=PIPE, stderr=PIPE, encoding="utf-8", errors="replace", ...) blocks (pg_recvlogical, psql_capture, safe_psql/sql, config_data, poll_query_until) through one private _run_text() helper. The decoding and error policy now live in a single place. --- src/test/pytest/pypg/server.py | 71 ++++++++++++++++------------------ 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/src/test/pytest/pypg/server.py b/src/test/pytest/pypg/server.py index 2524bf7e9778e..44bcd69bc1709 100644 --- a/src/test/pytest/pypg/server.py +++ b/src/test/pytest/pypg/server.py @@ -430,7 +430,7 @@ def _init_datadir( initdb_template = os.environ.get("INITDB_TEMPLATE") if ( initdb_template - and os.path.isdir(initdb_template) + and pathlib.Path(initdb_template).is_dir() and not extra and not force_initdb ): @@ -550,7 +550,7 @@ def start(self, fail_ok=False): ) ) from exc # Read the PID file to get the postmaster PID - with open(os.path.join(self.datadir, "postmaster.pid"), encoding="utf-8") as f: + with open(self.datadir / "postmaster.pid", encoding="utf-8") as f: self.pid = int(f.readline().strip()) return True @@ -728,15 +728,10 @@ def pg_recvlogical_upto( if "=" in key: raise ValueError("= not permitted in replication option name") cmd += ["--option", "{}={}".format(key, value)] - proc = subprocess.run( + proc = self._run_text( cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - errors="replace", env=self._connenv(), timeout=timeout_secs, - check=False, ) if proc.returncode != 0: raise PgServerError( @@ -848,7 +843,7 @@ def start_new_test(self, remaining_timeout): def psql(self, *args): """Run psql with the given arguments.""" - self._run(os.path.join(self._bindir, "psql"), "-w", *args) + self._run(self._bindir / "psql", "-w", *args) def psql_capture( self, @@ -889,15 +884,10 @@ def psql_capture( cmd += ["--set", "ON_ERROR_STOP=1"] if extra_params: cmd += [str(p) for p in extra_params] - proc = subprocess.run( + proc = self._run_text( cmd, input=query, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - errors="replace", env=self._connenv(), - check=False, timeout=timeout, ) # Match Cluster->psql, which chomps a single trailing newline off each. @@ -946,15 +936,10 @@ def _psql_text( "--dbname", connstr, ] - proc = subprocess.run( + proc = self._run_text( cmd, input=query, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - errors="replace", env=self._connenv(**(extra_env or {})), - check=False, timeout=timeout, ) if proc.returncode != 0: @@ -1031,14 +1016,7 @@ def config_data(self, *args): stripped); with no arguments the full pg_config output is returned. """ cmd = [str(self._bindir / "pg_config")] + [str(a) for a in args] - proc = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - errors="replace", - check=True, - ) + proc = self._run_text(cmd, check=True) return proc.stdout.rstrip("\n") def _check_log_patterns(self, test_name, offset, log_like, log_unlike): @@ -1213,15 +1191,10 @@ def poll_query_until(self, query, expected="t", dbname="postgres"): max_attempts = 10 * test_timeout_default() stdout = stderr = "" for _ in range(max_attempts): - proc = subprocess.run( + proc = self._run_text( cmd, input=query, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - errors="replace", env=self._connenv(), - check=False, ) stdout = proc.stdout.strip() stderr = proc.stderr.strip() @@ -1565,7 +1538,7 @@ def restart( ) from exc self._check_log_patterns("restart", offset, log_like, log_unlike) return False - with open(os.path.join(self.datadir, "postmaster.pid"), encoding="utf-8") as f: + with open(self.datadir / "postmaster.pid", encoding="utf-8") as f: self.pid = int(f.readline().strip()) self._check_log_patterns("restart", offset, log_like, log_unlike) return True @@ -1821,6 +1794,28 @@ def _run(self, cmd, *args, addenv: Optional[dict] = None): subenv = self._connenv(**(addenv or {})) run(cmd, *args, env=subenv) + @staticmethod + def _run_text(cmd, *, input=None, env=None, timeout=None, check=False): + """Run *cmd*, capturing decoded stdout/stderr; the framework's single + text-capturing subprocess call. + + Returns the completed process (with str .stdout/.stderr). Decoding uses + utf-8 with replacement so a stray non-UTF8 byte never crashes capture. + With check=True a nonzero exit raises CalledProcessError. + """ + # pylint: disable=redefined-builtin # 'input' matches subprocess.run + return subprocess.run( + cmd, + input=input, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + errors="replace", + env=env, + timeout=timeout, + check=check, + ) + def create_users(self, *userkeys: str): """Create test users and register them for cleanup.""" usermap = {} @@ -1934,7 +1929,7 @@ def signal_backend(self, pid, signame): Co-authored-by: Andrew Dunstan """ - self._run(os.path.join(self._bindir, "pg_ctl"), "kill", signame, str(pid)) + self._run(self._bindir / "pg_ctl", "kill", signame, str(pid)) def kill9(self): """Hard-kill the postmaster (cf. PostgreSQL::Test::Cluster->kill9). @@ -1945,7 +1940,7 @@ def kill9(self): Co-authored-by: Andrew Dunstan """ - pidfile = os.path.join(self.datadir, "postmaster.pid") + pidfile = self.datadir / "postmaster.pid" try: with open(pidfile, encoding="utf-8") as fh: pid = int(fh.readline().strip()) From cf3fc98c3e315cfede911844ef4db035eb882043 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 09:40:28 -0400 Subject: [PATCH 26/36] pytest: use ProgramResult.exit_code instead of the cryptic .rc in tests Mechanical rename of the result-object .rc accessor to .exit_code across all 42 test files that read it. Pure rename; .rc remains a property alias on ProgramResult so nothing breaks, but the tests now read in full words. --- contrib/amcheck/pyt/test_002_cic.py | 2 +- contrib/amcheck/pyt/test_003_cic_2pc.py | 8 ++++---- contrib/amcheck/pyt/test_005_pitr.py | 4 ++-- contrib/dblink/pyt/test_001_auth_scram.py | 12 ++++++----- .../postgres_fdw/pyt/test_001_auth_scram.py | 6 +++--- .../libpq-oauth/pyt/test_001_oauth.py | 2 +- src/interfaces/libpq/pyt/test_001_uri.py | 2 +- .../pyt/test_005_negotiate_encryption.py | 2 +- .../authentication/pyt/test_001_password.py | 6 +++--- .../pyt/test_006_login_trigger.py | 4 +++- src/test/icu/pyt/test_010_database.py | 6 +++--- src/test/kerberos/pyt/test_001_auth.py | 4 ++-- .../brin/pyt/test_02_wal_consistency.py | 2 +- .../modules/commit_ts/pyt/test_002_standby.py | 2 +- .../commit_ts/pyt/test_003_standby_2.py | 4 +++- .../modules/commit_ts/pyt/test_004_restart.py | 10 +++++----- .../oauth_validator/pyt/test_001_server.py | 4 +++- .../pyt/test_001_testfunc.py | 2 +- .../test_cloexec/pyt/test_001_cloexec.py | 2 +- .../test_escape/pyt/test_001_test_escape.py | 2 +- .../pyt/test_001_extension_control_path.py | 2 +- .../pyt/test_004_test_parser_perf.py | 4 ++-- .../pyt/test_001_constraint_validation.py | 2 +- .../test_misc/pyt/test_002_tablespace.py | 4 ++-- .../test_misc/pyt/test_012_ddlutils.py | 18 ++++++++--------- .../xid_wraparound/pyt/test_002_limits.py | 2 +- src/test/recovery/pyt/test_001_stream_rep.py | 20 +++++++++---------- .../recovery/pyt/test_003_recovery_targets.py | 2 +- .../recovery/pyt/test_006_logical_decoding.py | 8 ++++---- src/test/recovery/pyt/test_009_twophase.py | 2 +- .../test_010_logical_decoding_timelines.py | 6 +++--- .../recovery/pyt/test_012_subtransactions.py | 4 ++-- .../recovery/pyt/test_018_wal_optimize.py | 2 +- .../recovery/pyt/test_020_archive_status.py | 2 +- .../recovery/pyt/test_027_stream_regress.py | 2 +- .../recovery/pyt/test_037_invalid_database.py | 16 +++++++++------ .../pyt/test_041_checkpoint_at_promote.py | 2 +- .../test_053_standby_login_event_trigger.py | 2 +- src/test/ssl/pyt/test_002_scram.py | 2 +- src/test/ssl/pyt/test_003_sslinfo.py | 2 +- .../subscription/pyt/test_027_nosuperuser.py | 4 ++-- src/test/subscription/pyt/test_100_bugs.py | 8 ++++---- 42 files changed, 107 insertions(+), 95 deletions(-) diff --git a/contrib/amcheck/pyt/test_002_cic.py b/contrib/amcheck/pyt/test_002_cic.py index 6c7f6125d52af..d33b20e16e6fa 100644 --- a/contrib/amcheck/pyt/test_002_cic.py +++ b/contrib/amcheck/pyt/test_002_cic.py @@ -60,6 +60,6 @@ def test_002_cic(create_pg): result = node.psql_capture( "SELECT bt_index_parent_check('oscar', heapallindexed => true)" ) - assert result.rc == 0, "bt_index_parent_check for CIC after removed row" + assert result.exit_code == 0, "bt_index_parent_check for CIC after removed row" in_progress_h.quit() node.stop() diff --git a/contrib/amcheck/pyt/test_003_cic_2pc.py b/contrib/amcheck/pyt/test_003_cic_2pc.py index b45d59af631b3..6ea020a819672 100644 --- a/contrib/amcheck/pyt/test_003_cic_2pc.py +++ b/contrib/amcheck/pyt/test_003_cic_2pc.py @@ -44,10 +44,10 @@ def test_003_cic_2pc(create_pg): node.safe_psql("CREATE TABLE tbl(i int, j jsonb)") _build_indexes_with_2pc(node) assert ( - node.psql_capture("SELECT bt_index_check('idx',true)").rc == 0 + node.psql_capture("SELECT bt_index_check('idx',true)").exit_code == 0 ), "bt_index_check after overlapping 2PC" assert ( - node.psql_capture("SELECT gin_index_check('ginidx')").rc == 0 + node.psql_capture("SELECT gin_index_check('ginidx')").exit_code == 0 ), "gin_index_check after overlapping 2PC" node.safe_psql( "BEGIN;\nINSERT INTO tbl VALUES(0, " @@ -67,10 +67,10 @@ def test_003_cic_2pc(create_pg): node.safe_psql("COMMIT PREPARED 'spans_restart'") reindex_h.quit() assert ( - node.psql_capture("SELECT bt_index_check('idx',true)").rc == 0 + node.psql_capture("SELECT bt_index_check('idx',true)").exit_code == 0 ), "bt_index_check after 2PC and restart" assert ( - node.psql_capture("SELECT gin_index_check('ginidx')").rc == 0 + node.psql_capture("SELECT gin_index_check('ginidx')").exit_code == 0 ), "gin_index_check after 2PC and restart" node.safe_psql("REINDEX TABLE tbl;") node.pgbench( diff --git a/contrib/amcheck/pyt/test_005_pitr.py b/contrib/amcheck/pyt/test_005_pitr.py index f1c55315d10cb..5c7791e95ce28 100644 --- a/contrib/amcheck/pyt/test_005_pitr.py +++ b/contrib/amcheck/pyt/test_005_pitr.py @@ -68,7 +68,7 @@ def test_005_pitr(create_pg): result = replica.psql_capture( "{}; SELECT bt_index_parent_check('not_leftmost_pk', true)".format(debug) ) - assert result.rc == 0, "bt_index_parent_check passes" + assert result.exit_code == 0, "bt_index_parent_check passes" assert ( "interrupted page deletion detected" in result.stderr ), "bt_index_parent_check: interrupted page deletion detected" @@ -76,4 +76,4 @@ def test_005_pitr(create_pg): result = replica.psql_capture( "{}; SELECT bt_index_check('not_leftmost_pk', true)".format(debug) ) - assert result.rc == 0, "bt_index_check passes" + assert result.exit_code == 0, "bt_index_check passes" diff --git a/contrib/dblink/pyt/test_001_auth_scram.py b/contrib/dblink/pyt/test_001_auth_scram.py index cf8decd87f79d..5139a7c0c6d6c 100644 --- a/contrib/dblink/pyt/test_001_auth_scram.py +++ b/contrib/dblink/pyt/test_001_auth_scram.py @@ -137,7 +137,9 @@ def _test_scram_keys_not_overwritten(node, db, fdw): "(user '{user}', {opt} 'key');".format(user=_USER, fdw=fdw, opt=opt), connstr=_u_connstr(node, db), ) - assert res.rc == 3, "user mapping creation fails when using {}".format(opt) + assert res.exit_code == 3, "user mapping creation fails when using {}".format( + opt + ) assert re.search(r'ERROR: invalid option "{}"'.format(opt), res.stderr) @@ -146,7 +148,7 @@ def _test_invalid_overwritten_require_auth(node1, fdw): "select * from dblink('{}', 'select * from t') as t(a int, b int)".format(fdw), connstr=_u_connstr(node1, _DB0), ) - assert res.rc == 3, "loopback trust fails when overwriting require_auth" + assert res.exit_code == 3, "loopback trust fails when overwriting require_auth" assert re.search( r"password or GSSAPI delegated credentials required", res.stderr ), "expected error when connecting to a fdw overwriting the require_auth" @@ -163,7 +165,7 @@ def _test_disabled_passthrough(node1, fdw): "select * from dblink('{}', 'select * from t') as t(a int, b int)".format(fdw), connstr=connstr, ) - assert res.rc == 3, "SCRAM passthrough disabled on user mapping should fail" + assert res.exit_code == 3, "SCRAM passthrough disabled on user mapping should fail" assert re.search( r"password", res.stderr, re.IGNORECASE ), "expected password-related error when scram passthrough disabled" @@ -189,7 +191,7 @@ def _test_loopback_rejections(node1, node2, fdw_server, fdw_server2): ), connstr=_u_connstr(node1, _DB0), ) - assert res.rc == 3, "loopback trust fails on the same cluster" + assert res.exit_code == 3, "loopback trust fails on the same cluster" assert re.search( r'failed: authentication method requirement "scram-sha-256" failed: ' r"server did not complete authentication", @@ -201,7 +203,7 @@ def _test_loopback_rejections(node1, node2, fdw_server, fdw_server2): ), connstr=_u_connstr(node1, _DB0), ) - assert res.rc == 3, "loopback password fails on a different cluster" + assert res.exit_code == 3, "loopback password fails on a different cluster" assert re.search( r'authentication method requirement "scram-sha-256" failed: ' r"server requested a cleartext password", diff --git a/contrib/postgres_fdw/pyt/test_001_auth_scram.py b/contrib/postgres_fdw/pyt/test_001_auth_scram.py index 2a3052b2b2200..18f5599725464 100644 --- a/contrib/postgres_fdw/pyt/test_001_auth_scram.py +++ b/contrib/postgres_fdw/pyt/test_001_auth_scram.py @@ -151,7 +151,7 @@ def _passthrough_off(node1, db0, fdw3): res = node1.psql_capture( "SELECT count(1) FROM override_t", dbname=db0, connstr=connstr ) - assert res.rc == 3, "SCRAM passthrough disabled on user mapping should fail" + assert res.exit_code == 3, "SCRAM passthrough disabled on user mapping should fail" assert re.search( r"password", res.stderr, re.I ), "expected password-related error when scram passthrough disabled" @@ -174,12 +174,12 @@ def _loopback_rejections(node1, node2, db0): node2.restart() connstr = node1.connstr(db0) + " user={}".format(USER) res = node1.psql_capture("select count(1) from t", dbname=db0, connstr=connstr) - assert res.rc == 3, "loopback trust fails on the same cluster" + assert res.exit_code == 3, "loopback trust fails on the same cluster" assert re.search( r'failed: authentication method requirement "scram-sha-256"', res.stderr ), "expected error from loopback trust (same cluster)" res = node1.psql_capture("select count(1) from t2", dbname=db0, connstr=connstr) - assert res.rc == 3, "loopback password fails on a different cluster" + assert res.exit_code == 3, "loopback password fails on a different cluster" assert re.search( r'failed: authentication method requirement "scram-sha-256"', res.stderr ), "expected error from loopback password (different cluster)" diff --git a/src/interfaces/libpq-oauth/pyt/test_001_oauth.py b/src/interfaces/libpq-oauth/pyt/test_001_oauth.py index 3dd414f9a7640..6b51fa89e3dfc 100644 --- a/src/interfaces/libpq-oauth/pyt/test_001_oauth.py +++ b/src/interfaces/libpq-oauth/pyt/test_001_oauth.py @@ -15,4 +15,4 @@ def test_001_oauth(pg_bin): result = pg_bin.result(["oauth_tests"]) sys.stdout.write(result.stdout) sys.stderr.write(result.stderr) - assert result.rc == 0, "oauth_tests returned {}".format(result.rc) + assert result.exit_code == 0, "oauth_tests returned {}".format(result.exit_code) diff --git a/src/interfaces/libpq/pyt/test_001_uri.py b/src/interfaces/libpq/pyt/test_001_uri.py index e8d25fc702729..690ed48acc284 100644 --- a/src/interfaces/libpq/pyt/test_001_uri.py +++ b/src/interfaces/libpq/pyt/test_001_uri.py @@ -296,7 +296,7 @@ def test_001_uri(pg_bin: PgBin): result = _run_uri(pg_bin, uri, envvars) expected_exit = 0 if expected_stderr == "" else 1 - actual_exit = 0 if result.rc == 0 else 1 + actual_exit = 0 if result.exit_code == 0 else 1 assert actual_exit == expected_exit, "{}: exit status".format(uri) assert result.stdout == expected_stdout, "{}: stdout".format(uri) assert result.stderr == expected_stderr, "{}: stderr".format(uri) diff --git a/src/interfaces/libpq/pyt/test_005_negotiate_encryption.py b/src/interfaces/libpq/pyt/test_005_negotiate_encryption.py index 31162f8a14cc6..20e6c34b91522 100644 --- a/src/interfaces/libpq/pyt/test_005_negotiate_encryption.py +++ b/src/interfaces/libpq/pyt/test_005_negotiate_encryption.py @@ -212,7 +212,7 @@ def _connect_test(env, connstr, expected_events_and_outcome): on_error_stop=False, extra_params=["--no-password", "--command", "SELECT current_enc()"], ) - outcome = result.stdout if result.rc == 0 else "fail" + outcome = result.stdout if result.exit_code == 0 else "fail" log_contents = slurp_file(node.log, log_location) events = _parse_log_events(log_contents) diff --git a/src/test/authentication/pyt/test_001_password.py b/src/test/authentication/pyt/test_001_password.py index 45f78995ce8eb..c8b336ab2ebee 100644 --- a/src/test/authentication/pyt/test_001_password.py +++ b/src/test/authentication/pyt/test_001_password.py @@ -151,7 +151,7 @@ def _create_password_roles(node, md5_works): node.psql_capture( "SET password_encryption='scram-sha-256'; " "CREATE ROLE scram_role LOGIN PASSWORD 'pass';" - ).rc + ).exit_code == 0 ), "created user with SCRAM password" expected_md5_rc = 0 if md5_works else 3 @@ -159,7 +159,7 @@ def _create_password_roles(node, md5_works): node.psql_capture( "SET password_encryption='md5'; " "CREATE ROLE md5_role LOGIN PASSWORD 'pass';" - ).rc + ).exit_code == expected_md5_rc ), "created user with md5 password" @@ -843,7 +843,7 @@ def test_001_password(create_pg, tmp_path): _test_log_connections(node) # md5 could fail in FIPS mode. - md5_works = node.psql_capture("select md5('')").rc == 0 + md5_works = node.psql_capture("select md5('')").exit_code == 0 _create_password_roles(node, md5_works) _test_password_command(node) diff --git a/src/test/authentication/pyt/test_006_login_trigger.py b/src/test/authentication/pyt/test_006_login_trigger.py index 2c25e91f0af32..7c4f7d2982a0c 100644 --- a/src/test/authentication/pyt/test_006_login_trigger.py +++ b/src/test/authentication/pyt/test_006_login_trigger.py @@ -30,7 +30,9 @@ def _psql_command( err_exact=None, ): res = node.psql_capture(sql, connstr=connstr, on_error_stop=False) - assert res.rc == expected_ret, "{}: exit code {}".format(test_name, expected_ret) + assert res.exit_code == expected_ret, "{}: exit code {}".format( + test_name, expected_ret + ) out, err = res.stdout, res.stderr for rx in log_like or []: assert re.search(rx, out), "{}: log matches".format(test_name) diff --git a/src/test/icu/pyt/test_010_database.py b/src/test/icu/pyt/test_010_database.py index b57add4c18939..2216f514c3892 100644 --- a/src/test/icu/pyt/test_010_database.py +++ b/src/test/icu/pyt/test_010_database.py @@ -48,19 +48,19 @@ def test_010_database(create_pg): assert ( node1.psql_capture( "CREATE DATABASE dbicu1 LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8" - ).rc + ).exit_code == 0 ), "C locale works for ICU" assert ( node1.psql_capture( "CREATE DATABASE dbicu2 LOCALE_PROVIDER icu LOCALE '@colStrength=primary'\n LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0 ENCODING UTF8" - ).rc + ).exit_code == 0 ), "LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE are specified" result = node1.psql_capture( "CREATE DATABASE dbicu3 LOCALE_PROVIDER builtin LOCALE 'C' TEMPLATE dbicu" ) - assert result.rc != 0, "locale provider must match template: exit code not 0" + assert result.exit_code != 0, "locale provider must match template: exit code not 0" assert re.search( r"""ERROR: new locale provider \(builtin\) does not match locale provider of the template database \(icu\)""", result.stderr, diff --git a/src/test/kerberos/pyt/test_001_auth.py b/src/test/kerberos/pyt/test_001_auth.py index ac1f34797f1de..d69f4412bdcbf 100644 --- a/src/test/kerberos/pyt/test_001_auth.py +++ b/src/test/kerberos/pyt/test_001_auth.py @@ -420,8 +420,8 @@ def _delegation_fails(node, sql, connstr, msg): stdout. """ result = node.psql_capture(sql, connstr=connstr) - assert result.rc == 3, "{}: expected exit 3, got {}\n{}".format( - msg, result.rc, result.stderr + assert result.exit_code == 3, "{}: expected exit 3, got {}\n{}".format( + msg, result.exit_code, result.stderr ) assert re.search( r"password or GSSAPI delegated credentials required", result.stderr diff --git a/src/test/modules/brin/pyt/test_02_wal_consistency.py b/src/test/modules/brin/pyt/test_02_wal_consistency.py index beed6f7868f9a..ccbc07ae522ae 100644 --- a/src/test/modules/brin/pyt/test_02_wal_consistency.py +++ b/src/test/modules/brin/pyt/test_02_wal_consistency.py @@ -18,7 +18,7 @@ def test_02_wal_consistency(create_pg): assert ( whiskey.psql_capture( "SELECT pg_create_physical_replication_slot('standby_1');" - ).rc + ).exit_code == 0 ), "physical slot created on primary" backup_name = "brinbkp" diff --git a/src/test/modules/commit_ts/pyt/test_002_standby.py b/src/test/modules/commit_ts/pyt/test_002_standby.py index 49263172b800e..d5c6c5dfe0a6d 100644 --- a/src/test/modules/commit_ts/pyt/test_002_standby.py +++ b/src/test/modules/commit_ts/pyt/test_002_standby.py @@ -45,7 +45,7 @@ def test_002_standby(create_pg): result = standby.psql_capture( "select ts.* from pg_class, pg_xact_commit_timestamp(xmin) ts where relname = 't10'" ) - assert result.rc == 3, "standby errors when primary turned feature off" + assert result.exit_code == 3, "standby errors when primary turned feature off" assert result.stdout == "", "standby gives no value when primary turned feature off" assert re.search( r"""could not get commit timestamp data""", diff --git a/src/test/modules/commit_ts/pyt/test_003_standby_2.py b/src/test/modules/commit_ts/pyt/test_003_standby_2.py index edb0137d03303..110ffa3e1298a 100644 --- a/src/test/modules/commit_ts/pyt/test_003_standby_2.py +++ b/src/test/modules/commit_ts/pyt/test_003_standby_2.py @@ -35,7 +35,9 @@ def test_003_standby_2(create_pg): result = standby.psql_capture( "SELECT ts.* FROM pg_class, pg_xact_commit_timestamp(xmin) AS ts WHERE relname = 't10'" ) - assert result.rc == 3, "expect error when getting commit timestamp after restart" + assert ( + result.exit_code == 3 + ), "expect error when getting commit timestamp after restart" assert result.stdout == "", "standby does not return a value after restart" assert re.search( r"""could not get commit timestamp data""", diff --git a/src/test/modules/commit_ts/pyt/test_004_restart.py b/src/test/modules/commit_ts/pyt/test_004_restart.py index 680aca49b97f4..9c7f7ad85297c 100644 --- a/src/test/modules/commit_ts/pyt/test_004_restart.py +++ b/src/test/modules/commit_ts/pyt/test_004_restart.py @@ -18,16 +18,16 @@ def test_004_restart(create_pg): node_primary.append_conf("track_commit_timestamp = on") node_primary.start() result = node_primary.psql_capture("SELECT pg_xact_commit_timestamp('0');") - assert result.rc == 3, "getting ts of InvalidTransactionId reports error" + assert result.exit_code == 3, "getting ts of InvalidTransactionId reports error" assert re.search( r"""cannot retrieve commit timestamp for transaction""", result.stderr, ), "expected error from InvalidTransactionId" result = node_primary.psql_capture("SELECT pg_xact_commit_timestamp('1');") - assert result.rc == 0, "getting ts of BootstrapTransactionId succeeds" + assert result.exit_code == 0, "getting ts of BootstrapTransactionId succeeds" assert result.stdout == "", "timestamp of BootstrapTransactionId is null" result = node_primary.psql_capture("SELECT pg_xact_commit_timestamp('2');") - assert result.rc == 0, "getting ts of FrozenTransactionId succeeds" + assert result.exit_code == 0, "getting ts of FrozenTransactionId succeeds" assert result.stdout == "", "timestamp of FrozenTransactionId is null" assert ( node_primary.safe_psql("SELECT pg_xact_commit_timestamp('3');") == "" @@ -71,7 +71,7 @@ def test_004_restart(create_pg): result = node_primary.psql_capture( "SELECT pg_xact_commit_timestamp('" + str(xid) + "');" ) - assert result.rc == 3, "no commit timestamp from enable tx when cts disabled" + assert result.exit_code == 3, "no commit timestamp from enable tx when cts disabled" assert re.search( r"""could not get commit timestamp data""", result.stderr, @@ -82,7 +82,7 @@ def test_004_restart(create_pg): result = node_primary.psql_capture( "SELECT pg_xact_commit_timestamp('" + str(xid_disabled) + "');" ) - assert result.rc == 3, "no commit timestamp when disabled" + assert result.exit_code == 3, "no commit timestamp when disabled" assert re.search( r"""could not get commit timestamp data""", result.stderr, diff --git a/src/test/modules/oauth_validator/pyt/test_001_server.py b/src/test/modules/oauth_validator/pyt/test_001_server.py index dfd87c4786898..a62aec1607edb 100644 --- a/src/test/modules/oauth_validator/pyt/test_001_server.py +++ b/src/test/modules/oauth_validator/pyt/test_001_server.py @@ -502,7 +502,9 @@ def _phase_call_count(node, common): connstr=_encode_connstr(common, stage="token", retries=2), on_error_stop=False, ) - assert result.rc == 0, "call count connection succeeds\n{}".format(result.stderr) + assert result.exit_code == 0, "call count connection succeeds\n{}".format( + result.stderr + ) assert re.search(_VISIT, result.stderr), "call count: stderr matches" match = re.search(r"\[libpq\] total number of polls: (\d+)", result.stderr) diff --git a/src/test/modules/ssl_passphrase_callback/pyt/test_001_testfunc.py b/src/test/modules/ssl_passphrase_callback/pyt/test_001_testfunc.py index 2352c51a8cbb8..f6b00bb69ea97 100644 --- a/src/test/modules/ssl_passphrase_callback/pyt/test_001_testfunc.py +++ b/src/test/modules/ssl_passphrase_callback/pyt/test_001_testfunc.py @@ -51,7 +51,7 @@ def test_001_testfunc(create_pg): result = node.bin.run_command( ["pg_ctl", "--pgdata", str(node.datadir), "--log", str(node.log), "start"] ) - assert result.rc != 0, "pg_ctl fails with bad passphrase" + assert result.exit_code != 0, "pg_ctl fails with bad passphrase" assert not os.path.exists( "{}/postmaster.pid".format(ddir) ), "postgres not started with bad passphrase" diff --git a/src/test/modules/test_cloexec/pyt/test_001_cloexec.py b/src/test/modules/test_cloexec/pyt/test_001_cloexec.py index c6868428ac63c..9172b3147300e 100644 --- a/src/test/modules/test_cloexec/pyt/test_001_cloexec.py +++ b/src/test/modules/test_cloexec/pyt/test_001_cloexec.py @@ -17,6 +17,6 @@ def test_001_cloexec(pg_bin): if platform.system() != "Windows": pytest.skip("test is Windows-specific") result = pg_bin.result(["test_cloexec"]) - assert result.rc == 0 and re.search( + assert result.exit_code == 0 and re.search( r"SUCCESS.*O_CLOEXEC behavior verified", result.stdout, re.DOTALL ), "O_CLOEXEC prevents handle inheritance" diff --git a/src/test/modules/test_escape/pyt/test_001_test_escape.py b/src/test/modules/test_escape/pyt/test_001_test_escape.py index ce8aa2964554b..d958dd7db86f6 100644 --- a/src/test/modules/test_escape/pyt/test_001_test_escape.py +++ b/src/test/modules/test_escape/pyt/test_001_test_escape.py @@ -20,7 +20,7 @@ def test_001_test_escape(create_pg, pg_bin): ) conninfo = node.connstr() + " dbname=db_sql_ascii" result = pg_bin.result(["test_escape", "--conninfo", conninfo]) - assert result.rc == 0, "test_escape returns 0" + assert result.exit_code == 0, "test_escape returns 0" assert result.stderr == "", "test_escape stderr is empty" for line in result.stdout.split("\n"): if re.match(r"^ok \d+ ?(.*)", line): diff --git a/src/test/modules/test_extensions/pyt/test_001_extension_control_path.py b/src/test/modules/test_extensions/pyt/test_001_extension_control_path.py index 67875bedff05f..1e3b39efd777b 100644 --- a/src/test/modules/test_extensions/pyt/test_001_extension_control_path.py +++ b/src/test/modules/test_extensions/pyt/test_001_extension_control_path.py @@ -133,5 +133,5 @@ def test_001_extension_control_path(create_pg): "empty extension_control_path" ) res = node.psql_capture("CREATE EXTENSION invalid") - assert res.rc == 3, "error creating an extension that does not exist" + assert res.exit_code == 3, "error creating an extension that does not exist" assert re.search(r'ERROR: extension "invalid" is not available', res.stderr) diff --git a/src/test/modules/test_json_parser/pyt/test_004_test_parser_perf.py b/src/test/modules/test_json_parser/pyt/test_004_test_parser_perf.py index 4c578a0943e83..35691357c5b56 100644 --- a/src/test/modules/test_json_parser/pyt/test_004_test_parser_perf.py +++ b/src/test/modules/test_json_parser/pyt/test_004_test_parser_perf.py @@ -19,6 +19,6 @@ def test_004_test_parser_perf(pg_bin, tmp_path): fname = tmp_path / "perf.json" fname.write_text("[" + contents + ("," + contents) * 49 + "]", encoding="utf-8") result = pg_bin.result(["test_json_parser_perf", "1", str(fname)]) - assert result.rc == 0, "perf test runs with recursive descent parser" + assert result.exit_code == 0, "perf test runs with recursive descent parser" result = pg_bin.result(["test_json_parser_perf", "-i", "1", str(fname)]) - assert result.rc == 0, "perf test runs with table driven parser" + assert result.exit_code == 0, "perf test runs with table driven parser" diff --git a/src/test/modules/test_misc/pyt/test_001_constraint_validation.py b/src/test/modules/test_misc/pyt/test_001_constraint_validation.py index 2a3c3414bb915..60e03365f18af 100644 --- a/src/test/modules/test_misc/pyt/test_001_constraint_validation.py +++ b/src/test/modules/test_misc/pyt/test_001_constraint_validation.py @@ -27,7 +27,7 @@ def run_sql_command(sql): """Run a SQL command and return psql's stderr (including debug messages), mirroring the Perl run_sql_command sub (on_error_die).""" result = node.psql_capture(sql, on_error_stop=True) - assert result.rc == 0, "psql failed: {}".format(result.stderr) + assert result.exit_code == 0, "psql failed: {}".format(result.stderr) return result.stderr def is_table_verified(output): diff --git a/src/test/modules/test_misc/pyt/test_002_tablespace.py b/src/test/modules/test_misc/pyt/test_002_tablespace.py index 344b1f70af533..c7c931b779612 100644 --- a/src/test/modules/test_misc/pyt/test_002_tablespace.py +++ b/src/test/modules/test_misc/pyt/test_002_tablespace.py @@ -12,11 +12,11 @@ def _ok(node, sql, msg): - assert node.psql_capture(sql).rc == 0, msg + assert node.psql_capture(sql).exit_code == 0, msg def _fail(node, sql, msg): - assert node.psql_capture(sql).rc != 0, msg + assert node.psql_capture(sql).exit_code != 0, msg def test_002_tablespace(create_pg): diff --git a/src/test/modules/test_misc/pyt/test_012_ddlutils.py b/src/test/modules/test_misc/pyt/test_012_ddlutils.py index 2800238d5ccc1..4e1dbce866536 100644 --- a/src/test/modules/test_misc/pyt/test_012_ddlutils.py +++ b/src/test/modules/test_misc/pyt/test_012_ddlutils.py @@ -145,7 +145,7 @@ def unlike(text, pattern, _msg): # Non-existent role (should error) res = node.psql_capture("SELECT * FROM pg_get_role_ddl(9999999::oid)") - assert res.rc != 0, "non-existent role errors" + assert res.exit_code != 0, "non-existent role errors" like(res.stderr, r"does not exist", "non-existent role error message") # NULL input (should return no rows) @@ -162,7 +162,7 @@ def unlike(text, pattern, _msg): "SET ROLE regress_role_ddl_noaccess;\n" "\t SELECT * FROM pg_get_role_ddl('regress_role_ddl_test1')" ) - assert res.rc != 0, "role DDL denied without pg_authid access" + assert res.exit_code != 0, "role DDL denied without pg_authid access" node.safe_psql( """ \tGRANT SELECT ON pg_authid TO PUBLIC""" @@ -186,7 +186,7 @@ def unlike(text, pattern, _msg): res = node.psql_capture( "SELECT * FROM pg_get_database_ddl('regression_no_such_db')" ) - assert res.rc != 0, "non-existent database errors" + assert res.exit_code != 0, "non-existent database errors" # NULL input result = node.safe_psql("SELECT count(*) FROM pg_get_database_ddl(NULL)") @@ -196,7 +196,7 @@ def unlike(text, pattern, _msg): res = node.psql_capture( "SELECT * FROM pg_get_database_ddl('regression_ddlutils_test', 'owner', 'invalid')" ) - assert res.rc != 0, "invalid boolean option errors" + assert res.exit_code != 0, "invalid boolean option errors" like(res.stderr, r"invalid value", "invalid option error message") # Duplicate option @@ -204,7 +204,7 @@ def unlike(text, pattern, _msg): "SELECT * FROM pg_get_database_ddl('regression_ddlutils_test',\n" "\t 'owner', 'false', 'owner', 'true')" ) - assert res.rc != 0, "duplicate option errors" + assert res.exit_code != 0, "duplicate option errors" # Basic output (without locale details) result = _ddl_filter( @@ -251,7 +251,7 @@ def unlike(text, pattern, _msg): "SET ROLE regress_role_ddl_noaccess;\n" "\t SELECT * FROM pg_get_database_ddl('regression_ddlutils_test')" ) - assert res.rc != 0, "database DDL denied without CONNECT" + assert res.exit_code != 0, "database DDL denied without CONNECT" node.safe_psql( """ \tGRANT CONNECT ON DATABASE regression_ddlutils_test TO PUBLIC""" @@ -265,11 +265,11 @@ def unlike(text, pattern, _msg): res = node.psql_capture( "SELECT * FROM pg_get_tablespace_ddl('regress_nonexistent_tblsp')" ) - assert res.rc != 0, "non-existent tablespace errors" + assert res.exit_code != 0, "non-existent tablespace errors" # Non-existent tablespace by OID res = node.psql_capture("SELECT * FROM pg_get_tablespace_ddl(0::oid)") - assert res.rc != 0, "non-existent tablespace OID errors" + assert res.exit_code != 0, "non-existent tablespace OID errors" # NULL input (name and OID variants) result = node.safe_psql("SELECT count(*) FROM pg_get_tablespace_ddl(NULL::name)") @@ -346,7 +346,7 @@ def unlike(text, pattern, _msg): "SET ROLE regress_role_ddl_noaccess;\n" "\t SELECT * FROM pg_get_tablespace_ddl('regress_allopt_tblsp')" ) - assert res.rc != 0, "tablespace DDL denied without pg_tablespace access" + assert res.exit_code != 0, "tablespace DDL denied without pg_tablespace access" node.safe_psql( """ \tGRANT SELECT ON pg_tablespace TO PUBLIC""" diff --git a/src/test/modules/xid_wraparound/pyt/test_002_limits.py b/src/test/modules/xid_wraparound/pyt/test_002_limits.py index 5a69b229c7015..4f13554c814a6 100644 --- a/src/test/modules/xid_wraparound/pyt/test_002_limits.py +++ b/src/test/modules/xid_wraparound/pyt/test_002_limits.py @@ -41,7 +41,7 @@ def test_002_limits(create_pg): warn_limit = 0 for _ in range(1, 16): res = node.psql_capture("SELECT consume_xids(10000000)") - assert res.rc == 0 # on_error_die => 1 + assert res.exit_code == 0 # on_error_die => 1 if re.search( r'WARNING: database "postgres" must be vacuumed within [0-9]+ transactions', res.stderr, diff --git a/src/test/recovery/pyt/test_001_stream_rep.py b/src/test/recovery/pyt/test_001_stream_rep.py index 1e165b1de2e67..1532d3de145fa 100644 --- a/src/test/recovery/pyt/test_001_stream_rep.py +++ b/src/test/recovery/pyt/test_001_stream_rep.py @@ -88,10 +88,10 @@ def _check_sequences(primary, standby1, standby2): == "t" ), "pg_sequence_last_value() on unlogged sequence on standby 1" assert ( - standby1.psql_capture("INSERT INTO tab_int VALUES (1)").rc == 3 + standby1.psql_capture("INSERT INTO tab_int VALUES (1)").exit_code == 3 ), "read-only queries on standby 1" assert ( - standby2.psql_capture("INSERT INTO tab_int VALUES (1)").rc == 3 + standby2.psql_capture("INSERT INTO tab_int VALUES (1)").exit_code == 3 ), "read-only queries on standby 2" @@ -106,14 +106,14 @@ def _tsa(node1, node2, target, mode, status): on_error_stop=False, ) if status == 0: - assert res.rc == 0 and res.stdout.strip() == str( + assert res.exit_code == 0 and res.stdout.strip() == str( target.port ), 'connect with mode "{}" and {},{} listed'.format( mode, node1.name, node2.name ) else: assert ( - res.rc == status and target is None + res.exit_code == status and target is None ), 'fail to connect with mode "{}"'.format(mode) @@ -148,13 +148,13 @@ def _show_and_read_slot(primary): for connstr, label in ((rep, "physical"), (db, "logical")): for sql in ("SHOW ALL;", "SHOW work_mem;", "SHOW primary_conninfo;"): assert ( - primary.psql_capture(sql, connstr=connstr).rc == 0 + primary.psql_capture(sql, connstr=connstr).exit_code == 0 ), "{} over {} replication".format(sql, label) slotname = "test_read_replication_slot_physical" res = primary.psql_capture( "READ_REPLICATION_SLOT non_existent_slot;", connstr=rep, on_error_stop=False ) - assert res.rc == 0, "READ_REPLICATION_SLOT exit code 0 on success" + assert res.exit_code == 0, "READ_REPLICATION_SLOT exit code 0 on success" assert re.search( r"^\|\|$", res.stdout.strip(), re.M ), "READ_REPLICATION_SLOT returns NULL values if slot does not exist" @@ -164,7 +164,7 @@ def _show_and_read_slot(primary): res = primary.psql_capture( "READ_REPLICATION_SLOT {};".format(slotname), connstr=rep ) - assert res.rc == 0, "READ_REPLICATION_SLOT success with existing slot" + assert res.exit_code == 0, "READ_REPLICATION_SLOT success with existing slot" assert re.search( r"^physical\|[^|]*\|1$", res.stdout.strip(), re.M ), "READ_REPLICATION_SLOT returns tuple with slot information" @@ -190,7 +190,7 @@ def _slot_xmins(primary, standby1, standby2): assert ( primary.psql_capture( "SELECT pg_create_physical_replication_slot('standby_1');" - ).rc + ).exit_code == 0 ), "physical slot created on primary" standby1.append_conf("primary_slot_name = standby_1") @@ -200,7 +200,7 @@ def _slot_xmins(primary, standby1, standby2): assert ( standby1.psql_capture( "SELECT pg_create_physical_replication_slot('standby_2');" - ).rc + ).exit_code == 0 ), "physical slot created on intermediate replica" standby2.append_conf("primary_slot_name = standby_2") @@ -312,7 +312,7 @@ def _physical_slot_advance(primary, standby1, standby2): "SELECT pg_replication_slot_advance('{}', '{}'::pg_lsn);".format( phys_slot, current_lsn ) - ).rc + ).exit_code == 0 ), "slot advancing with physical slot" pre = primary.safe_psql( diff --git a/src/test/recovery/pyt/test_003_recovery_targets.py b/src/test/recovery/pyt/test_003_recovery_targets.py index 1b485fd87bf6b..0b66085112796 100644 --- a/src/test/recovery/pyt/test_003_recovery_targets.py +++ b/src/test/recovery/pyt/test_003_recovery_targets.py @@ -174,7 +174,7 @@ def _test_conflicting_targets(pg_bin, create_pg, primary, m): result = pg_bin.result( ["pg_ctl", "--pgdata", standby.datadir, "--log", standby.log, "start"] ) - assert result.rc != 0, "invalid recovery startup fails" + assert result.exit_code != 0, "invalid recovery startup fails" assert re.search( r"multiple recovery targets specified", pypg.slurp_file(standby.log) ), "multiple conflicting settings" diff --git a/src/test/recovery/pyt/test_006_logical_decoding.py b/src/test/recovery/pyt/test_006_logical_decoding.py index 3ca2e3f25614f..72ef2fad91ac9 100644 --- a/src/test/recovery/pyt/test_006_logical_decoding.py +++ b/src/test/recovery/pyt/test_006_logical_decoding.py @@ -120,7 +120,7 @@ def _otherdb_phase(node): "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) " "ORDER BY lsn DESC LIMIT 1;", dbname="otherdb", - ).rc + ).exit_code == 3 ), "replaying logical slot from another database fails" node.safe_psql( @@ -149,7 +149,7 @@ def _otherdb_phase(node): dbname="otherdb", ), "slot never became active" assert ( - node.psql_capture("DROP DATABASE otherdb").rc == 3 + node.psql_capture("DROP DATABASE otherdb").exit_code == 3 ), "dropping a DB with active logical slots fails" finally: recv.terminate() @@ -163,7 +163,7 @@ def _otherdb_phase(node): dbname="otherdb", ), "slot never became inactive" assert ( - node.psql_capture("DROP DATABASE otherdb").rc == 0 + node.psql_capture("DROP DATABASE otherdb").exit_code == 0 ), "dropping a DB with inactive logical slots succeeds" assert ( node.slot("otherdb_slot")["plugin"] == "" @@ -187,7 +187,7 @@ def _advance_and_stats_phase(node): "SELECT pg_replication_slot_advance('{}', '{}'::pg_lsn);".format( logical_slot, current_lsn ) - ).rc + ).exit_code == 0 ), "slot advancing with logical slot" pre = node.safe_psql( diff --git a/src/test/recovery/pyt/test_009_twophase.py b/src/test/recovery/pyt/test_009_twophase.py index bb715f344d2d0..b85ddb1557968 100644 --- a/src/test/recovery/pyt/test_009_twophase.py +++ b/src/test/recovery/pyt/test_009_twophase.py @@ -36,7 +36,7 @@ def _configure_and_reload(node, parameter): def _issue(node, body): """Run a multi-statement block with on_error_stop off; return rc.""" - return node.psql_capture(body, on_error_stop=False).rc + return node.psql_capture(body, on_error_stop=False).exit_code def test_009_twophase(create_pg): diff --git a/src/test/recovery/pyt/test_010_logical_decoding_timelines.py b/src/test/recovery/pyt/test_010_logical_decoding_timelines.py index 4945ffb0cdc26..f1dec91b7063c 100644 --- a/src/test/recovery/pyt/test_010_logical_decoding_timelines.py +++ b/src/test/recovery/pyt/test_010_logical_decoding_timelines.py @@ -66,7 +66,7 @@ def test_010_logical_decoding_timelines(create_pg): replica.append_conf("primary_slot_name = 'phys_slot'") replica.start() assert ( - primary.psql_capture("DROP DATABASE dropme").rc == 0 + primary.psql_capture("DROP DATABASE dropme").exit_code == 0 ), "dropped DB with logical slot OK on primary" primary.wait_for_catchup(replica) assert ( @@ -108,7 +108,7 @@ def test_010_logical_decoding_timelines(create_pg): "SELECT data FROM pg_logical_slot_peek_changes('after_basebackup', NULL, " "NULL, 'include-xids', '0', 'skip-empty-xacts', '1');" ) - assert res.rc == 3, "replaying from after_basebackup slot fails" + assert res.exit_code == 3, "replaying from after_basebackup slot fails" assert re.search( r'replication slot "after_basebackup" does not exist', res.stderr ), "after_basebackup slot missing" @@ -117,7 +117,7 @@ def test_010_logical_decoding_timelines(create_pg): "NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", timeout=pypg.test_timeout_default(), ) - assert res.rc == 0, "replay from slot before_basebackup succeeds" + assert res.exit_code == 0, "replay from slot before_basebackup succeeds" assert res.stdout == _EXPECTED, "decoded expected data from slot before_basebackup" assert res.stderr == "", "replay from slot before_basebackup produces no stderr" endpos = replica.safe_psql( diff --git a/src/test/recovery/pyt/test_012_subtransactions.py b/src/test/recovery/pyt/test_012_subtransactions.py index de47c50d695aa..99b292c71766e 100644 --- a/src/test/recovery/pyt/test_012_subtransactions.py +++ b/src/test/recovery/pyt/test_012_subtransactions.py @@ -85,7 +85,7 @@ def test_012_subtransactions(create_pg): primary, standby = standby, primary standby.enable_streaming(primary) standby.start() - assert primary.psql_capture("COMMIT PREPARED 'xact_012_1'").rc == 0, ( + assert primary.psql_capture("COMMIT PREPARED 'xact_012_1'").exit_code == 0, ( "Restore of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted " "standby" ) @@ -102,7 +102,7 @@ def test_012_subtransactions(create_pg): primary, standby = standby, primary standby.enable_streaming(primary) standby.start() - assert primary.psql_capture("ROLLBACK PREPARED 'xact_012_1'").rc == 0, ( + assert primary.psql_capture("ROLLBACK PREPARED 'xact_012_1'").exit_code == 0, ( "Rollback of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted " "standby" ) diff --git a/src/test/recovery/pyt/test_018_wal_optimize.py b/src/test/recovery/pyt/test_018_wal_optimize.py index c7b2135df7824..dec891ed1cee2 100644 --- a/src/test/recovery/pyt/test_018_wal_optimize.py +++ b/src/test/recovery/pyt/test_018_wal_optimize.py @@ -203,7 +203,7 @@ def _trigger_battery(node, wl, copy_file): node.stop("immediate") node.start() res = node.psql_capture("INSERT INTO idx_hint VALUES (2);") - assert res.rc == 3, "wal_level = {}, unique index LP_DEAD".format(wl) + assert res.exit_code == 3, "wal_level = {}, unique index LP_DEAD".format(wl) assert re.search( r"violates unique", res.stderr ), "wal_level = {}, unique index LP_DEAD message".format(wl) diff --git a/src/test/recovery/pyt/test_020_archive_status.py b/src/test/recovery/pyt/test_020_archive_status.py index b95441f87f8ed..b8f4d43007150 100644 --- a/src/test/recovery/pyt/test_020_archive_status.py +++ b/src/test/recovery/pyt/test_020_archive_status.py @@ -179,7 +179,7 @@ def _test_backup_mode(primary): "SELECT pg_backup_stop();" "SELECT pg_backup_start(repeat('x', 1026))" ) - assert result.rc == 3, "psql fails correctly" + assert result.exit_code == 3, "psql fails correctly" assert re.search( r"backup label too long", result.stderr ), "pg_backup_start fails gracefully" diff --git a/src/test/recovery/pyt/test_027_stream_regress.py b/src/test/recovery/pyt/test_027_stream_regress.py index 33eaf8e78334d..cc42b54866e41 100644 --- a/src/test/recovery/pyt/test_027_stream_regress.py +++ b/src/test/recovery/pyt/test_027_stream_regress.py @@ -30,7 +30,7 @@ def test_027_stream_regress(create_pg, pg_bin): assert ( primary.psql_capture( "SELECT pg_create_physical_replication_slot('standby_1');" - ).rc + ).exit_code == 0 ), "physical slot created on primary" backup_name = "my_backup" diff --git a/src/test/recovery/pyt/test_037_invalid_database.py b/src/test/recovery/pyt/test_037_invalid_database.py index 02571e9b08085..687e9ee667616 100644 --- a/src/test/recovery/pyt/test_037_invalid_database.py +++ b/src/test/recovery/pyt/test_037_invalid_database.py @@ -25,18 +25,22 @@ def _mark_invalid_checks(node): ) result = node.psql_capture("", dbname="regression_invalid") - assert result.rc == 2, "can't connect to invalid database - error code" + assert result.exit_code == 2, "can't connect to invalid database - error code" assert re.search( r'FATAL:\s+cannot connect to invalid database "regression_invalid"', result.stderr, ), "can't connect to invalid database - error message" assert ( - node.psql_capture("ALTER DATABASE regression_invalid CONNECTION LIMIT 10").rc + node.psql_capture( + "ALTER DATABASE regression_invalid CONNECTION LIMIT 10" + ).exit_code == 2 ), "can't ALTER invalid database" assert ( - node.psql_capture("CREATE DATABASE copy_invalid TEMPLATE regression_invalid").rc + node.psql_capture( + "CREATE DATABASE copy_invalid TEMPLATE regression_invalid" + ).exit_code == 3 ), "can't use invalid database as template" @@ -53,10 +57,10 @@ def _mark_invalid_checks(node): ), "invalid databases are ignored by vac_truncate_clog" assert ( - node.psql_capture("DROP DATABASE regression_invalid").rc == 0 + node.psql_capture("DROP DATABASE regression_invalid").exit_code == 0 ), "can DROP invalid database" assert ( - node.psql_capture("DROP DATABASE regression_invalid").rc == 3 + node.psql_capture("DROP DATABASE regression_invalid").exit_code == 3 ), "can't drop already dropped database" @@ -96,7 +100,7 @@ def test_invalid_database(create_pg): bgpsql.clear() assert ( - node.psql_capture("", dbname="regression_invalid_interrupt").rc == 2 + node.psql_capture("", dbname="regression_invalid_interrupt").exit_code == 2 ), "can't connect to invalid_interrupt database" # Release the lock and drop the database for real. diff --git a/src/test/recovery/pyt/test_041_checkpoint_at_promote.py b/src/test/recovery/pyt/test_041_checkpoint_at_promote.py index ceb711ba8f9ec..3d5fbaa9ae7ae 100644 --- a/src/test/recovery/pyt/test_041_checkpoint_at_promote.py +++ b/src/test/recovery/pyt/test_041_checkpoint_at_promote.py @@ -63,5 +63,5 @@ def test_041_checkpoint_at_promote(create_pg): killme.quit() assert standby.poll_query_until("", ""), "server back up after crash recovery" res = standby.psql_capture("select 1") - assert res.rc == 0, "psql connect success" + assert res.exit_code == 0, "psql connect success" assert res.stdout == "1", "psql select 1" diff --git a/src/test/recovery/pyt/test_053_standby_login_event_trigger.py b/src/test/recovery/pyt/test_053_standby_login_event_trigger.py index d8ea90e70678a..bc6343aeab619 100644 --- a/src/test/recovery/pyt/test_053_standby_login_event_trigger.py +++ b/src/test/recovery/pyt/test_053_standby_login_event_trigger.py @@ -56,7 +56,7 @@ def test_standby_login_event_trigger(create_pg): # A login to that DB on the standby must not try to clear the flag (which # would need AccessExclusiveLock, forbidden during recovery). result = standby.psql_capture("SELECT 1", dbname="regress_login_evt") - assert result.rc == 0, "standby accepts connection to DB with dangling flag" + assert result.exit_code == 0, "standby accepts connection to DB with dangling flag" assert not re.search( r"cannot acquire lock mode AccessExclusiveLock", result.stderr ), "no AccessExclusiveLock FATAL on standby login" diff --git a/src/test/ssl/pyt/test_002_scram.py b/src/test/ssl/pyt/test_002_scram.py index 3829aeafca10d..19ee5baa59c65 100644 --- a/src/test/ssl/pyt/test_002_scram.py +++ b/src/test/ssl/pyt/test_002_scram.py @@ -38,7 +38,7 @@ def test_002_scram(create_pg, tmp_path): supports_rsapss_certs = False node = create_pg("primary", hostaddr=SERVERHOSTADDR, start=True) - md5_works = node.psql_capture("select md5('')").rc == 0 + md5_works = node.psql_capture("select md5('')").exit_code == 0 ssl_server.configure_test_server_for_ssl( node, diff --git a/src/test/ssl/pyt/test_003_sslinfo.py b/src/test/ssl/pyt/test_003_sslinfo.py index 44ef8bdab604b..379061da58ee0 100644 --- a/src/test/ssl/pyt/test_003_sslinfo.py +++ b/src/test/ssl/pyt/test_003_sslinfo.py @@ -106,7 +106,7 @@ def _test_with_cert(node, common_connstr): result = node.psql_capture( "SELECT ssl_client_dn_field('invalid');", connstr=common_connstr ) - assert result.rc == 3, "ssl_client_dn_field() for an invalid field" + assert result.exit_code == 3, "ssl_client_dn_field() for an invalid field" assert ( node.safe_psql( diff --git a/src/test/subscription/pyt/test_027_nosuperuser.py b/src/test/subscription/pyt/test_027_nosuperuser.py index d1785cbf1eeb0..873f72ba4cfb4 100644 --- a/src/test/subscription/pyt/test_027_nosuperuser.py +++ b/src/test/subscription/pyt/test_027_nosuperuser.py @@ -190,7 +190,7 @@ def _password_required(create_pg): "SET SESSION AUTHORIZATION regress_test_user;\n" "ALTER SUBSCRIPTION regress_test_sub REFRESH PUBLICATION;" ) - assert result.rc != 0, "non-superuser owner without password fails" + assert result.exit_code != 0, "non-superuser owner without password fails" assert re.search( r"DETAIL: Non-superusers must provide a password in the connection " r"string\.", @@ -207,7 +207,7 @@ def _password_required(create_pg): "ALTER SUBSCRIPTION regress_test_sub CONNECTION '{}';\n" "ALTER SUBSCRIPTION regress_test_sub REFRESH PUBLICATION;".format(connstr2) ) - assert result.rc == 0, "refresh succeeds once the password is supplied" + assert result.exit_code == 0, "refresh succeeds once the password is supplied" def test_nosuperuser(create_pg): diff --git a/src/test/subscription/pyt/test_100_bugs.py b/src/test/subscription/pyt/test_100_bugs.py index 60842f69cff88..9e01899f806c4 100644 --- a/src/test/subscription/pyt/test_100_bugs.py +++ b/src/test/subscription/pyt/test_100_bugs.py @@ -54,13 +54,13 @@ def _test_temp_unlogged_for_all_tables(publisher, subscriber): assert ( publisher.psql_capture( "CREATE TEMPORARY TABLE tt1 AS SELECT 1 AS a; UPDATE tt1 SET a = 2;" - ).rc + ).exit_code == 0 ), "update to temporary table without replica identity" assert ( publisher.psql_capture( "CREATE UNLOGGED TABLE tu1 AS SELECT 1 AS a; UPDATE tu1 SET a = 2;" - ).rc + ).exit_code == 0 ), "update to unlogged table without replica identity" publisher.safe_psql("DROP PUBLICATION pub") @@ -322,7 +322,7 @@ def _test_replication_slot_commands(publisher, subscriber): extra_params=["-d", connstr_db], timeout=pypg.test_timeout_default(), ) - assert result.rc == 0, "create and immediate drop of replication slot" + assert result.exit_code == 0, "create and immediate drop of replication slot" publisher.stop("fast") subscriber.stop("fast") @@ -401,7 +401,7 @@ def _test_drop_subscription_deadlock(publisher): "PUBLICATION regress_pub WITH (connect=false);".format(connstr) ) result = publisher.psql_capture("DROP SUBSCRIPTION regress_sub1") - assert result.rc != 0, "replication slot does not exist: exit code not 0" + assert result.exit_code != 0, "replication slot does not exist: exit code not 0" assert ( 'ERROR: could not drop replication slot "regress_sub1" on publisher' in result.stderr From baf5659d3c23843a301cc0dcd4f578227fc3838d Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 09:45:06 -0400 Subject: [PATCH 27/36] pytest: assert server failures with pytest.raises, not fail_ok booleans Convert the pure status-flag uses of restart(fail_ok=True)/start(fail_ok=True) to the Pythonic form: a restart expected to fail is `with pytest.raises( PgServerError): node.restart()`, and one expected to succeed is just node.restart() (which raises if it cannot). The fail_ok cases that pair with log_like/log_unlike pattern checks, and the one genuine boolean conditional in test_017_shm, keep fail_ok -- there it is an API for "tolerate and inspect," not a swallowed status. --- .../oauth_validator/pyt/test_001_server.py | 6 ++-- .../recovery/pyt/test_033_replay_tsp_drops.py | 2 +- src/test/ssl/pyt/test_001_ssltests.py | 15 ++++---- src/test/ssl/pyt/test_004_sni.py | 35 +++++++++---------- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/test/modules/oauth_validator/pyt/test_001_server.py b/src/test/modules/oauth_validator/pyt/test_001_server.py index a62aec1607edb..3943ef690f11d 100644 --- a/src/test/modules/oauth_validator/pyt/test_001_server.py +++ b/src/test/modules/oauth_validator/pyt/test_001_server.py @@ -756,9 +756,9 @@ def _phase_validator_hba_options(node, common, issuer, offset): def _phase_multiple_validators(node, issuer, offset): """With multiple validators each HBA line must name one explicitly.""" node.append_conf("oauth_validator_libraries = 'validator, fail_validator'\n") - assert ( - node.restart(fail_ok=True) is False - ), "restart fails without explicit validators in oauth HBA entries" + with pytest.raises(pypg.PgServerError): + # restart fails without explicit validators in oauth HBA entries + node.restart() offset = node.wait_for_log( r'authentication method "oauth" requires option "validator" to be set', offset ) diff --git a/src/test/recovery/pyt/test_033_replay_tsp_drops.py b/src/test/recovery/pyt/test_033_replay_tsp_drops.py index 76c9b3f8a9109..6e5a5d64eb43b 100644 --- a/src/test/recovery/pyt/test_033_replay_tsp_drops.py +++ b/src/test/recovery/pyt/test_033_replay_tsp_drops.py @@ -56,7 +56,7 @@ def _test_tablespace(create_pg, strategy): standby.safe_psql("ALTER SYSTEM SET log_min_messages TO debug1;") standby.stop("immediate") - assert standby.start(fail_ok=True) is True, "standby node started for " + strategy + standby.start() # standby node must start for this strategy standby.stop("immediate") diff --git a/src/test/ssl/pyt/test_001_ssltests.py b/src/test/ssl/pyt/test_001_ssltests.py index ccb97ee68748b..90459be0c1c0b 100644 --- a/src/test/ssl/pyt/test_001_ssltests.py +++ b/src/test/ssl/pyt/test_001_ssltests.py @@ -163,19 +163,20 @@ def _test_protocol_and_groups(node, ssl_server): node.append_conf( "ssl_min_protocol_version='TLSv1.2'\nssl_max_protocol_version='TLSv1.1'" ) - assert ( - node.restart(fail_ok=True) is False - ), "restart fails with incorrect SSL protocol bounds" + with pytest.raises(pypg.PgServerError): + # restart fails with incorrect SSL protocol bounds + node.restart() node.append_conf("ssl_min_protocol_version='TLSv1.2'\nssl_max_protocol_version=''") - assert ( - node.restart(fail_ok=True) is True - ), "restart succeeds with correct SSL protocol bounds" + # restart succeeds with correct SSL protocol bounds + node.restart() # Colon-separated groups: a bad value fails to start. The value is reset # later by switch_server_cert (which rewrites sslconfig.conf from scratch). node.append_conf("ssl_groups='bad:value'", "sslconfig.conf") - assert node.restart(fail_ok=True) is False, "restart fails with incorrect groups" + with pytest.raises(pypg.PgServerError): + # restart fails with incorrect groups + node.restart() assert not node.log_matches(r"no SSL error reported"), "error message translated" node.append_conf("ssl_groups='prime256v1'", "ssl_config.conf") node.restart(fail_ok=True) diff --git a/src/test/ssl/pyt/test_004_sni.py b/src/test/ssl/pyt/test_004_sni.py index e9d160dd8ea6f..ec33e615b6c05 100644 --- a/src/test/ssl/pyt/test_004_sni.py +++ b/src/test/ssl/pyt/test_004_sni.py @@ -220,7 +220,9 @@ def _test_invalid_pg_hosts(node): for conf, msg in cases: os.unlink(_data_dir(node) / "pg_hosts.conf") node.append_conf(conf, "pg_hosts.conf") - assert node.restart(fail_ok=True) is False, msg + with pytest.raises(pypg.PgServerError): + node.restart() + assert node.is_alive() is False, msg def _test_passphrase_reload(node, connstr, exec_backend): @@ -261,10 +263,9 @@ def _test_passphrase_reload(node, connstr, exec_backend): '"echo wrongpassword" on', "pg_hosts.conf", ) - assert node.restart(fail_ok=True) is False, ( - "pg_hosts.conf: restart fails with password-protected key when using " - "the wrong passphrase command" - ) + with pytest.raises(pypg.PgServerError): + # restart must fail with password-protected key and the wrong passphrase + node.restart() # Correct passphrase command: server must start. os.unlink(_data_dir(node) / "pg_hosts.conf") @@ -273,10 +274,8 @@ def _test_passphrase_reload(node, connstr, exec_backend): '"echo secret1" on', "pg_hosts.conf", ) - assert node.restart(fail_ok=True) is True, ( - "pg_hosts.conf: restart succeeds with password-protected key when using " - "the correct passphrase command" - ) + # restart succeeds with password-protected key and the correct passphrase + node.restart() localhost_connstr = ( "{} sslrootcert=ssl/root+server_ca.crt sslmode=require host=localhost".format( @@ -312,10 +311,8 @@ def _test_passphrase_no_reload(node, localhost_connstr, exec_backend): "pg_hosts.conf", ) node_loglocation = node.current_log_position() - assert node.restart(fail_ok=True) is True, ( - "pg_hosts.conf: restart succeeds with password-protected key when using " - "the correct passphrase command" - ) + # restart succeeds with password-protected key and the correct passphrase + node.restart() log = pypg.slurp_file(node.log, node_loglocation) assert ( "cannot be reloaded because it requires a passphrase" not in log @@ -442,9 +439,9 @@ def _test_client_cas_eol(node): '"cmd" on TRAILING_TEXT MORE_TEXT', "pg_hosts.conf", ) - assert ( - node.restart(fail_ok=True) is False - ), "pg_hosts.conf: restart fails with extra data at EOL" + with pytest.raises(pypg.PgServerError): + # restart fails with extra data at EOL + node.restart() os.unlink(_data_dir(node) / "pg_hosts.conf") node.append_conf( @@ -452,6 +449,6 @@ def _test_client_cas_eol(node): '"cmd" notabooleanvalue', "pg_hosts.conf", ) - assert ( - node.restart(fail_ok=True) is False - ), "pg_hosts.conf: restart fails with non-boolean value in boolean field" + with pytest.raises(pypg.PgServerError): + # restart fails with non-boolean value in boolean field + node.restart() From de6ff14b3e10362f07b5cf4020114fde4559dd6b Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 09:47:23 -0400 Subject: [PATCH 28/36] pytest: make psql sessions context managers Add __enter__/__exit__ to BackgroundPsql and InteractivePsql so a session can be scoped with `with node.background_psql(...) as bg:` and is always quit() on block exit, even on an exception -- matching how the libpq PGconn already works and removing the manual-finish() leak risk. Adds a self-test for the background-psql context manager. --- src/test/pytest/pypg/bgpsql.py | 6 ++++++ src/test/pytest/pypg/interactive.py | 6 ++++++ src/test/pytest/pyt/test_node_sql.py | 9 +++++++++ 3 files changed, 21 insertions(+) diff --git a/src/test/pytest/pypg/bgpsql.py b/src/test/pytest/pypg/bgpsql.py index 31855f00122bb..bb1a17ba43cf6 100644 --- a/src/test/pytest/pypg/bgpsql.py +++ b/src/test/pytest/pypg/bgpsql.py @@ -231,6 +231,12 @@ def quit(self): finish = quit + def __enter__(self) -> "BackgroundPsql": + return self + + def __exit__(self, *exc) -> None: + self.quit() + def restart(self): """Quit (if needed) and start a fresh psql session with the same params.""" self.quit() diff --git a/src/test/pytest/pypg/interactive.py b/src/test/pytest/pypg/interactive.py index 5c3f5205cdd7c..b86ef31d73d11 100644 --- a/src/test/pytest/pypg/interactive.py +++ b/src/test/pytest/pypg/interactive.py @@ -110,3 +110,9 @@ def quit(self): self._proc.wait() os.close(self._master) return self._proc.returncode + + def __enter__(self) -> "InteractivePsql": + return self + + def __exit__(self, *exc) -> None: + self.quit() diff --git a/src/test/pytest/pyt/test_node_sql.py b/src/test/pytest/pyt/test_node_sql.py index 7504ff29a79a7..cdbeb92aa092e 100644 --- a/src/test/pytest/pyt/test_node_sql.py +++ b/src/test/pytest/pyt/test_node_sql.py @@ -71,3 +71,12 @@ def test_scalar_rejects_multi_row(pg): result = pg.sql("SELECT * FROM (VALUES (1), (2)) v") with pytest.raises(ValueError): result.scalar() + + +def test_background_psql_context_manager(pg): + """A background psql session can be used as a context manager.""" + with pg.background_psql("postgres") as bg: + assert bg.query("SELECT 1").strip() == "1" + # Leaving the block quits the session; a second one starts cleanly. + with pg.background_psql("postgres") as bg: + assert bg.query("SELECT 2").strip() == "2" From c236b26849641158934fc745d580d71c57b1a868 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 14 Jun 2026 09:48:58 -0400 Subject: [PATCH 29/36] pytest: make Backup a frozen dataclass Replace the Backup namedtuple (the HBA + postgresql.conf snapshot a reloading()/restarting() scope restores) with a frozen dataclass carrying field types and a docstring, and drop the now-unused namedtuple import. --- src/test/pytest/pypg/server.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/test/pytest/pypg/server.py b/src/test/pytest/pypg/server.py index 44bcd69bc1709..da17196815e68 100644 --- a/src/test/pytest/pypg/server.py +++ b/src/test/pytest/pypg/server.py @@ -12,7 +12,7 @@ import subprocess import tempfile import time -from collections import namedtuple +from dataclasses import dataclass from typing import Callable, Dict, Optional, Tuple from ._env import test_timeout_default @@ -122,7 +122,14 @@ def set(self, **gucs): print(n, "=", v, file=f) -Backup = namedtuple("Backup", "conf, hba") +@dataclass(frozen=True) +class Backup: + """The HBA and postgresql.conf backups taken by a reloading()/restarting() + scope, restored when the scope exits.""" + + conf: "Config" + hba: "HBA" + WINDOWS_OS = platform.system() == "Windows" From a4995c819aa4ab641156158dabfc966352c03ac3 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Thu, 11 Jun 2026 07:58:12 -0400 Subject: [PATCH 30/36] ci: run only the pytest suite, disabling the Perl TAP tests Set -Dtap_tests=disabled across the Linux (64-bit ASan, 32-bit, io_uring/LLVM), macOS, and Windows meson jobs so CI exercises the Python pytest suite alone. The Perl TAP tests and PostgreSQL::Test helper modules remain in the tree and runnable locally for cross-validation; only their CI execution is turned off. This is fully reversible by flipping the flag back. Co-authored-by: Andrew Dunstan Co-authored-by: Greg Burd --- .github/workflows/pg-ci.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pg-ci.yml b/.github/workflows/pg-ci.yml index 0b32bcbabff5b..43e3093c96002 100644 --- a/.github/workflows/pg-ci.yml +++ b/.github/workflows/pg-ci.yml @@ -90,7 +90,7 @@ env: -Dpltcl=enabled -Dreadline=enabled -Dssl=openssl - -Dtap_tests=enabled + -Dtap_tests=disabled -Dzlib=enabled -Dzstd=enabled @@ -346,7 +346,7 @@ jobs: --buildtype=debug \ --auto-features=disabled \ -Ddefault_library=shared \ - -Dtap_tests=enabled \ + -Dtap_tests=disabled \ build - name: Build @@ -569,6 +569,7 @@ jobs: --pkg-config-path /usr/lib/i386-linux-gnu/pkgconfig/ \ -DPERL=perl5.40-i386-linux-gnu \ -Dlibnuma=disabled \ + -Dtap_tests=disabled \ build - name: Build @@ -657,6 +658,7 @@ jobs: -Duuid=e2fs \ --buildtype=debug \ -Dllvm=enabled \ + -Dtap_tests=disabled \ build - name: Build @@ -879,7 +881,7 @@ jobs: -Dplperl=enabled -Dplpython=enabled -Dssl=openssl - -Dtap_tests=enabled + -Dtap_tests=disabled defaults: run: From a46834c558b4a6205d1ccb2711286af805aa9561 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 15 Jun 2026 07:51:02 -0400 Subject: [PATCH 31/36] pytest: add a general-purpose wait_until() poll helper poll_query_until covers "run this query until its text output equals X" (the faithful analog of Perl's Cluster->poll_query_until, used verbatim by the ports). wait_until() covers the cases it cannot: polling an arbitrary condition with a try/except inside, e.g. waiting for a crashed server to accept connections again while swallowing PgSqlError. It yields once per attempt until the caller breaks, or raises TimeoutError after the timeout. Matches the wait_until in Jelte Fennema-Nio's pytap branch so the two efforts converge. Co-authored-by: Jelte Fennema-Nio Co-authored-by: Greg Burd --- src/test/pytest/pypg/util.py | 38 ++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/test/pytest/pypg/util.py b/src/test/pytest/pypg/util.py index f1a3d5345f7e1..b933eb2371550 100644 --- a/src/test/pytest/pypg/util.py +++ b/src/test/pytest/pypg/util.py @@ -272,3 +272,41 @@ def wait_for_file(filename, regexp, offset=0): raise TimeoutError( "timed out waiting for file {} contents to match: {}".format(filename, regexp) ) + + +def wait_until(error_message="condition not met", timeout=None, interval=0.1): + """Poll for a condition, yielding once per attempt until it holds. + + A general-purpose alternative to poll_query_until for conditions that are + not a single query comparing equal to a fixed string -- for example, + waiting for a crashed server to accept connections again while swallowing + the connection errors:: + + for _ in wait_until("server did not come back after crash", timeout=180): + try: + node.sql("SELECT 1") + break + except PgSqlError: + pass + + The loop runs until ``break`` (success) or *timeout* seconds elapse, at + which point a TimeoutError carrying *error_message* is raised. *timeout* + defaults to PG_TEST_TIMEOUT_DEFAULT. Progress is logged every 5s for long + waits. + """ + import time as _time # pylint: disable=import-outside-toplevel + from ._env import test_timeout_default # pylint: disable=import-outside-toplevel + + if timeout is None: + timeout = test_timeout_default() + start = _time.monotonic() + end = start + timeout + last_progress = start + while _time.monotonic() < end: + now = _time.monotonic() + if timeout > 5 and now - last_progress > 5: + last_progress = now + print("{} after {:.0f}s - will retry".format(error_message, now - start)) + yield + _time.sleep(interval) + raise TimeoutError(error_message + " in time") From 085882dfb3bcea4f2fe971e7c01fc3eccecfbeb0 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 15 Jun 2026 07:51:02 -0400 Subject: [PATCH 32/36] pytest: build out the libpq binding layer Modularize the in-process libpq binding into focused modules along the boundaries Andrew Dunstan uses, keeping our API: constants.py (enums + ctypes structs), findlib.py (locate/load libpq + the ABI-mismatch skip), oids.py (type-OID conversion registry), result.py (PGresult + ResultData), and session.py (PGconn + connect/connstr). PGconn/PGresult/connect/connstr and the exception classes still import from `libpq`. Add named per-SQLSTATE exception subclasses (QueryCanceled, UniqueViolation, DeadlockDetected, ...) with a for_sqlstate() dispatch, so a test can write `with pytest.raises(QueryCanceled):` instead of catching the generic error and checking .sqlstate; every subclass is still catchable as LibpqError / pypg.PgSqlError. Reviving the per-error-code idea from an earlier revision of Jelte Fennema-Nio's patchset. Adopt Andrew Dunstan's ResultData: result.py carries the column names and type OIDs alongside the converted rows, and PGconn.query() returns it (no shape simplification) for tests asserting on result structure or column typing. Add LISTEN/NOTIFY support via the PGnotify struct and PGconn.get_notification()/get_all_notifications(), so the libpq channel can consume async notifications the way the psql channel reads psql's text echo. Co-authored-by: Andrew Dunstan Co-authored-by: Jelte Fennema-Nio Co-authored-by: Greg Burd --- src/test/pytest/libpq/__init__.py | 57 ++- src/test/pytest/libpq/_core.py | 547 ---------------------- src/test/pytest/libpq/constants.py | 72 +++ src/test/pytest/libpq/errors.py | 99 ++++ src/test/pytest/libpq/findlib.py | 142 ++++++ src/test/pytest/libpq/oids.py | 141 ++++++ src/test/pytest/libpq/pgnotify.py | 47 ++ src/test/pytest/libpq/result.py | 147 ++++++ src/test/pytest/libpq/session.py | 171 +++++++ src/test/pytest/pypg/__init__.py | 35 +- src/test/pytest/pypg/errors.py | 37 +- src/test/pytest/pyt/test_errors.py | 47 +- src/test/pytest/pyt/test_query_helpers.py | 34 ++ 13 files changed, 1013 insertions(+), 563 deletions(-) delete mode 100644 src/test/pytest/libpq/_core.py create mode 100644 src/test/pytest/libpq/constants.py create mode 100644 src/test/pytest/libpq/findlib.py create mode 100644 src/test/pytest/libpq/oids.py create mode 100644 src/test/pytest/libpq/pgnotify.py create mode 100644 src/test/pytest/libpq/result.py create mode 100644 src/test/pytest/libpq/session.py diff --git a/src/test/pytest/libpq/__init__.py b/src/test/pytest/libpq/__init__.py index 94554fee91f1c..cffe89c8a19a1 100644 --- a/src/test/pytest/libpq/__init__.py +++ b/src/test/pytest/libpq/__init__.py @@ -7,28 +7,61 @@ """ from . import errors -from .errors import LibpqError -from ._core import ( - ConnectionStatus, - DiagField, - ExecStatus, - PGconn, - PGresult, - connect, - connstr, - load_libpq_handle, - libpq_abi_skip_reason, - register_type_info, +from .errors import ( + LibpqError, + for_sqlstate, + SyntaxErrorState, + UndefinedTable, + UndefinedColumn, + InsufficientPrivilege, + UniqueViolation, + ForeignKeyViolation, + NotNullViolation, + CheckViolation, + SerializationFailure, + DeadlockDetected, + QueryCanceled, + AdminShutdown, + CrashShutdown, + CannotConnectNow, + ReadOnlySqlTransaction, + ObjectInUse, ) +from .constants import ConnectionStatus, DiagField, ExecStatus +from .findlib import load_libpq_handle, libpq_abi_skip_reason +from .oids import register_type_info +from .pgnotify import PGnotify, read_notification +from .result import PGresult, ResultData +from .session import PGconn, connect, connstr __all__ = [ "errors", "LibpqError", + "for_sqlstate", + "SyntaxErrorState", + "UndefinedTable", + "UndefinedColumn", + "InsufficientPrivilege", + "UniqueViolation", + "ForeignKeyViolation", + "NotNullViolation", + "CheckViolation", + "SerializationFailure", + "DeadlockDetected", + "QueryCanceled", + "AdminShutdown", + "CrashShutdown", + "CannotConnectNow", + "ReadOnlySqlTransaction", + "ObjectInUse", "ConnectionStatus", "DiagField", "ExecStatus", "PGconn", "PGresult", + "ResultData", + "PGnotify", + "read_notification", "connect", "connstr", "load_libpq_handle", diff --git a/src/test/pytest/libpq/_core.py b/src/test/pytest/libpq/_core.py deleted file mode 100644 index b99661aecff17..0000000000000 --- a/src/test/pytest/libpq/_core.py +++ /dev/null @@ -1,547 +0,0 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group - -""" -Core libpq functionality - ctypes bindings and connection handling. -""" - -import contextlib -import ctypes -import datetime -import decimal -import enum -import json -import platform -import os -import uuid -from typing import Any, Callable, Dict, NoReturn, Optional - -from .errors import LibpqError - - -# PG_DIAG field identifiers from postgres_ext.h -class DiagField(enum.IntEnum): - """PG_DIAG_* field identifiers used with PQresultErrorField().""" - - SEVERITY = ord("S") - SEVERITY_NONLOCALIZED = ord("V") - SQLSTATE = ord("C") - MESSAGE_PRIMARY = ord("M") - MESSAGE_DETAIL = ord("D") - MESSAGE_HINT = ord("H") - STATEMENT_POSITION = ord("P") - INTERNAL_POSITION = ord("p") - INTERNAL_QUERY = ord("q") - CONTEXT = ord("W") - SCHEMA_NAME = ord("s") - TABLE_NAME = ord("t") - COLUMN_NAME = ord("c") - DATATYPE_NAME = ord("d") - CONSTRAINT_NAME = ord("n") - SOURCE_FILE = ord("F") - SOURCE_LINE = ord("L") - SOURCE_FUNCTION = ord("R") - - -class ConnectionStatus(enum.IntEnum): - """PostgreSQL connection status codes from libpq.""" - - CONNECTION_OK = 0 - CONNECTION_BAD = 1 - - -class ExecStatus(enum.IntEnum): - """PostgreSQL result status codes from PQresultStatus.""" - - PGRES_EMPTY_QUERY = 0 - PGRES_COMMAND_OK = 1 - PGRES_TUPLES_OK = 2 - PGRES_COPY_OUT = 3 - PGRES_COPY_IN = 4 - PGRES_BAD_RESPONSE = 5 - PGRES_NONFATAL_ERROR = 6 - PGRES_FATAL_ERROR = 7 - PGRES_COPY_BOTH = 8 - PGRES_SINGLE_TUPLE = 9 - PGRES_PIPELINE_SYNC = 10 - PGRES_PIPELINE_ABORTED = 11 - - -class _PGconn(ctypes.Structure): - pass - - -class _PGresult(ctypes.Structure): - pass - - -_PGconn_p = ctypes.POINTER(_PGconn) -_PGresult_p = ctypes.POINTER(_PGresult) - - -def _libpq_path(libdir, bindir): - """Return the platform-specific full path to libpq for this build.""" - system = platform.system() - if system in ("Linux", "FreeBSD", "NetBSD", "OpenBSD"): - # On Windows, libpq.dll is confusingly in bindir, not libdir. - return os.path.join(libdir, "libpq.so.5") - if system == "Darwin": - return os.path.join(libdir, "libpq.5.dylib") - if system == "Windows": - return os.path.join(bindir, "libpq.dll") - raise AssertionError("the libpq fixture must be updated for {}".format(system)) - - -def _elf_class(path): - """Return 1 (ELFCLASS32), 2 (ELFCLASS64), or None if path is not ELF.""" - try: - with open(path, "rb") as fh: - ident = fh.read(5) - except OSError: - return None - if ident[:4] != b"\x7fELF": - return None - return ident[4] # e_ident[EI_CLASS]: 1 = 32-bit, 2 = 64-bit - - -def libpq_abi_skip_reason(libdir, bindir): - """Return a reason to skip if this Python cannot load the build's libpq. - - The framework loads libpq in-process via ctypes, so the interpreter and the - library must share an ABI. The common mismatch is a 64-bit Python against a - 32-bit libpq (meson's -m32 build), which otherwise fails every test with - OSError: wrong ELF class. Detect it by reading the library's ELF header - rather than dlopen()ing it -- a trial dlopen of an ASan-instrumented libpq - would abort the process, not raise. Returns None when the ABI matches, when - libpq cannot be located, or when the file is not ELF (macOS/Windows). - - Co-authored-by: Andrew Dunstan - """ - try: - path = _libpq_path(libdir, bindir) - except AssertionError: - return None - elf_class = _elf_class(path) - if elf_class is None: - return None - py_bits = ctypes.sizeof(ctypes.c_void_p) * 8 - lib_bits = 64 if elf_class == 2 else 32 - if py_bits != lib_bits: - return ( - "{py}-bit Python cannot load {lib}-bit libpq ({path}); the " - "in-process libpq framework needs a {lib}-bit interpreter".format( - py=py_bits, lib=lib_bits, path=path - ) - ) - return None - - -def load_libpq_handle(libdir, bindir): - """ - Loads a ctypes handle for libpq. Some common function prototypes are - initialized for general use. - """ - system = platform.system() - - if system in ("Linux", "FreeBSD", "NetBSD", "OpenBSD"): - name = "libpq.so.5" - elif system == "Darwin": - name = "libpq.5.dylib" - elif system == "Windows": - name = "libpq.dll" - else: - assert False, f"the libpq fixture must be updated for {system}" - - if system == "Windows": - # On Windows, libpq.dll is confusingly in bindir, not libdir. And we - # need to add this directory the the search path. - libpq_path = os.path.join(bindir, name) - lib = ctypes.CDLL(libpq_path) - else: - libpq_path = os.path.join(libdir, name) - lib = ctypes.CDLL(libpq_path) - - # - # Function Prototypes - # - - lib.PQconnectdb.restype = _PGconn_p - lib.PQconnectdb.argtypes = [ctypes.c_char_p] - - lib.PQstatus.restype = ctypes.c_int - lib.PQstatus.argtypes = [_PGconn_p] - - lib.PQexec.restype = _PGresult_p - lib.PQexec.argtypes = [_PGconn_p, ctypes.c_char_p] - - lib.PQresultStatus.restype = ctypes.c_int - lib.PQresultStatus.argtypes = [_PGresult_p] - - lib.PQclear.restype = None - lib.PQclear.argtypes = [_PGresult_p] - - lib.PQerrorMessage.restype = ctypes.c_char_p - lib.PQerrorMessage.argtypes = [_PGconn_p] - - lib.PQfinish.restype = None - lib.PQfinish.argtypes = [_PGconn_p] - - lib.PQresultErrorMessage.restype = ctypes.c_char_p - lib.PQresultErrorMessage.argtypes = [_PGresult_p] - - lib.PQntuples.restype = ctypes.c_int - lib.PQntuples.argtypes = [_PGresult_p] - - lib.PQnfields.restype = ctypes.c_int - lib.PQnfields.argtypes = [_PGresult_p] - - lib.PQgetvalue.restype = ctypes.c_char_p - lib.PQgetvalue.argtypes = [_PGresult_p, ctypes.c_int, ctypes.c_int] - - lib.PQgetisnull.restype = ctypes.c_int - lib.PQgetisnull.argtypes = [_PGresult_p, ctypes.c_int, ctypes.c_int] - - lib.PQftype.restype = ctypes.c_uint - lib.PQftype.argtypes = [_PGresult_p, ctypes.c_int] - - lib.PQresultErrorField.restype = ctypes.c_char_p - lib.PQresultErrorField.argtypes = [_PGresult_p, ctypes.c_int] - - return lib - - -# PostgreSQL type OIDs and conversion system -# Type registry - maps OID to converter function -_type_converters: Dict[int, Callable[[str], Any]] = {} -_array_to_elem_map: Dict[int, int] = {} - - -def register_type_info( - name: str, oid: int, array_oid: int, converter: Callable[[str], Any] -): - """ - Register a PostgreSQL type with its OID, array OID, and conversion function. - - Usage: - register_type_info("bool", 16, 1000, lambda v: v == "t") - """ - _type_converters[oid] = converter - if array_oid is not None: - _array_to_elem_map[array_oid] = oid - - -def _parse_array(value: str, elem_oid: int): - """Parse PostgreSQL array syntax into nested Python lists.""" - stack: list[list] = [] - current_element: list[str] = [] - in_quotes = False - was_quoted = False - pos = 0 - - while pos < len(value): - char = value[pos] - - if in_quotes: - if char == "\\": - next_char = value[pos + 1] - if next_char not in '"\\': - raise NotImplementedError('Only \\" and \\\\ escapes are supported') - current_element.append(next_char) - pos += 2 - continue - if char == '"': - in_quotes = False - else: - current_element.append(char) - elif char == '"': - in_quotes = True - was_quoted = True - elif char == "{": - stack.append([]) - elif char in ",}": - if current_element or was_quoted: - elem = "".join(current_element) - if not was_quoted and elem == "NULL": - stack[-1].append(None) - else: - stack[-1].append(_convert_pg_value(elem, elem_oid)) - current_element = [] - was_quoted = False - if char == "}": - completed = stack.pop() - if not stack: - return completed - stack[-1].append(completed) - elif char != " ": - current_element.append(char) - pos += 1 - - raise ValueError(f"Malformed array literal: {value}") - - -# Register standard PostgreSQL types that we'll likely encounter in tests -register_type_info("bool", 16, 1000, lambda v: v == "t") -register_type_info("int2", 21, 1005, int) -register_type_info("int4", 23, 1007, int) -register_type_info("int8", 20, 1016, int) -register_type_info("float4", 700, 1021, float) -register_type_info("float8", 701, 1022, float) -register_type_info("numeric", 1700, 1231, decimal.Decimal) -register_type_info("text", 25, 1009, str) -register_type_info("varchar", 1043, 1015, str) -register_type_info("date", 1082, 1182, datetime.date.fromisoformat) -register_type_info("time", 1083, 1183, datetime.time.fromisoformat) -register_type_info("timestamp", 1114, 1115, datetime.datetime.fromisoformat) -register_type_info("timestamptz", 1184, 1185, datetime.datetime.fromisoformat) -register_type_info("uuid", 2950, 2951, uuid.UUID) -register_type_info("json", 114, 199, json.loads) -register_type_info("jsonb", 3802, 3807, json.loads) - - -def _convert_pg_value(value: str, type_oid: int) -> Any: - """ - Convert PostgreSQL string value to appropriate Python type based on OID. - Uses the registered type converters from register_type_info(). - """ - # Check if it's an array type - if type_oid in _array_to_elem_map: - elem_oid = _array_to_elem_map[type_oid] - return _parse_array(value, elem_oid) - - # Use registered converter if available - converter = _type_converters.get(type_oid) - if converter: - return converter(value) - - # Unknown types - return as string - return value - - -def simplify_query_results(results) -> Any: - """ - Simplify the results of a query so that the caller doesn't have to unpack - lists and tuples of length 1. - """ - if len(results) == 1: - row = results[0] - if len(row) == 1: - # If there's only a single cell, just return the value - return row[0] - # If there's only a single row, just return that row - return row - - if len(results) != 0 and len(results[0]) == 1: - # If there's only a single column, return an array of values - return [row[0] for row in results] - - # if there are multiple rows and columns, return the results as is - return results - - -class PGresult(contextlib.AbstractContextManager): - """Wraps a raw _PGresult_p with a more friendly interface.""" - - def __init__(self, lib: ctypes.CDLL, res: _PGresult_p): # type: ignore[valid-type] - self._lib = lib - # Cleared to None on __exit__ once the result has been freed. - self._res: Optional[_PGresult_p] = res # type: ignore[valid-type] - - def __exit__(self, *exc): - self._lib.PQclear(self._res) - self._res = None - - def status(self) -> ExecStatus: - return ExecStatus(self._lib.PQresultStatus(self._res)) - - def error_message(self): - """Returns the error message associated with this result.""" - msg = self._lib.PQresultErrorMessage(self._res) - return msg.decode() if msg else "" - - def _get_error_field(self, field: DiagField) -> Optional[str]: - """Get an error field from the result using PQresultErrorField.""" - val = self._lib.PQresultErrorField(self._res, int(field)) - return val.decode() if val else None - - def raise_error(self) -> NoReturn: - """ - Raises LibpqError with diagnostic information from the result. - """ - if not self._res: - raise LibpqError("query failed: out of memory or connection lost") - - sqlstate = self._get_error_field(DiagField.SQLSTATE) - primary = self._get_error_field(DiagField.MESSAGE_PRIMARY) - detail = self._get_error_field(DiagField.MESSAGE_DETAIL) - hint = self._get_error_field(DiagField.MESSAGE_HINT) - severity = self._get_error_field(DiagField.SEVERITY) - schema_name = self._get_error_field(DiagField.SCHEMA_NAME) - table_name = self._get_error_field(DiagField.TABLE_NAME) - column_name = self._get_error_field(DiagField.COLUMN_NAME) - datatype_name = self._get_error_field(DiagField.DATATYPE_NAME) - constraint_name = self._get_error_field(DiagField.CONSTRAINT_NAME) - context = self._get_error_field(DiagField.CONTEXT) - - position_str = self._get_error_field(DiagField.STATEMENT_POSITION) - position = int(position_str) if position_str else None - - raise LibpqError( - primary or self.error_message(), - sqlstate=sqlstate, - severity=severity, - primary=primary, - detail=detail, - hint=hint, - schema_name=schema_name, - table_name=table_name, - column_name=column_name, - datatype_name=datatype_name, - constraint_name=constraint_name, - position=position, - context=context, - ) - - def fetch_all(self): - """ - Fetch all rows and convert to Python types. - Returns a list of tuples, with values converted based on their PostgreSQL type. - """ - nrows = self._lib.PQntuples(self._res) - ncols = self._lib.PQnfields(self._res) - - # Get type OIDs for each column - type_oids = [self._lib.PQftype(self._res, col) for col in range(ncols)] - - results = [] - for row in range(nrows): - row_data = [] - for col in range(ncols): - if self._lib.PQgetisnull(self._res, row, col): - row_data.append(None) - else: - value = self._lib.PQgetvalue(self._res, row, col).decode() - row_data.append(_convert_pg_value(value, type_oids[col])) - results.append(tuple(row_data)) - - return results - - -class PGconn(contextlib.AbstractContextManager): - """ - Wraps a raw _PGconn_p with a more friendly interface. This is just a - stub; it's expected to grow. - """ - - def __init__( - self, - lib: ctypes.CDLL, - handle: _PGconn_p, # type: ignore[valid-type] - stack: contextlib.ExitStack, - ): - self._lib = lib - # Cleared to None on __exit__ once the connection has been finished. - self._handle: Optional[_PGconn_p] = handle # type: ignore[valid-type] - self._stack = stack - - def __exit__(self, *exc): - self._lib.PQfinish(self._handle) - self._handle = None - - def exec(self, query: str) -> PGresult: - """ - Executes a query via PQexec() and returns a PGresult. - """ - res = self._lib.PQexec(self._handle, query.encode()) - return self._stack.enter_context(PGresult(self._lib, res)) - - def sql(self, query: str): # pylint: disable=inconsistent-return-statements - """ - Executes a query and raises an exception if it fails. - Returns the query results with automatic type conversion and simplification. - For commands that don't return data (INSERT, UPDATE, etc.), returns None. - - Examples: - - SELECT 1 -> 1 - - SELECT 1, 2 -> (1, 2) - - SELECT * FROM generate_series(1, 3) -> [1, 2, 3] - - SELECT * FROM (VALUES (1, 'a'), (2, 'b')) t -> [(1, 'a'), (2, 'b')] - - CREATE TABLE ... -> None - - INSERT INTO ... -> None - """ - res = self.exec(query) - status = res.status() - - if status == ExecStatus.PGRES_COMMAND_OK: - return None - if status == ExecStatus.PGRES_TUPLES_OK: - results = res.fetch_all() - return simplify_query_results(results) - # PGRES_FATAL_ERROR and anything else: raise (raise_error is NoReturn). - res.raise_error() - - -def connstr(opts: Dict[str, Any]) -> str: - """ - Flattens the provided options into a libpq connection string. Values - are converted to str and quoted/escaped as necessary. - """ - settings = [] - - for k, v in opts.items(): - v = str(v) - if not v: - v = "''" - else: - v = v.replace("\\", "\\\\") - v = v.replace("'", "\\'") - - if " " in v: - v = f"'{v}'" - - settings.append(f"{k}={v}") - - return " ".join(settings) - - -def connect( - libpq_handle: ctypes.CDLL, - stack: contextlib.ExitStack, - remaining_timeout_fn: Callable[[], float], - **opts, -) -> PGconn: - """ - Connects to a server, using the given connection options, and - returns a PGconn object wrapping the connection handle. A - failure will raise LibpqError. - - Connections honor PG_TEST_TIMEOUT_DEFAULT unless connect_timeout is - explicitly overridden in opts. - - Args: - libpq_handle: ctypes.CDLL handle to libpq library - stack: ExitStack for managing connection cleanup - remaining_timeout_fn: Function that returns remaining timeout in seconds - **opts: Connection options (host, port, dbname, etc.) - - Returns: - PGconn: Connected database connection - - Raises: - LibpqError: If connection fails - """ - - if "connect_timeout" not in opts: - t = int(remaining_timeout_fn()) - opts["connect_timeout"] = max(t, 1) - - conn_p = libpq_handle.PQconnectdb(connstr(opts).encode()) - - # Check connection status before adding to stack - if libpq_handle.PQstatus(conn_p) != ConnectionStatus.CONNECTION_OK: - error_msg = libpq_handle.PQerrorMessage(conn_p).decode() - # Manually close the failed connection - libpq_handle.PQfinish(conn_p) - raise LibpqError(error_msg) - - # Connection succeeded - add to stack for cleanup - conn = stack.enter_context(PGconn(libpq_handle, conn_p, stack=stack)) - return conn diff --git a/src/test/pytest/libpq/constants.py b/src/test/pytest/libpq/constants.py new file mode 100644 index 0000000000000..f5dc5ce388874 --- /dev/null +++ b/src/test/pytest/libpq/constants.py @@ -0,0 +1,72 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""ctypes declarations and libpq enums for the in-process binding. + +The opaque ``PGconn``/``PGresult`` C structs and their pointer types, plus the +status and diagnostic-field enumerations from postgres_ext.h. Kept in one place +so the rest of the libpq layer (findlib, result, session) shares a single set of +type declarations. +""" + +import ctypes +import enum + + +# PG_DIAG field identifiers from postgres_ext.h +class DiagField(enum.IntEnum): + """PG_DIAG_* field identifiers used with PQresultErrorField().""" + + SEVERITY = ord("S") + SEVERITY_NONLOCALIZED = ord("V") + SQLSTATE = ord("C") + MESSAGE_PRIMARY = ord("M") + MESSAGE_DETAIL = ord("D") + MESSAGE_HINT = ord("H") + STATEMENT_POSITION = ord("P") + INTERNAL_POSITION = ord("p") + INTERNAL_QUERY = ord("q") + CONTEXT = ord("W") + SCHEMA_NAME = ord("s") + TABLE_NAME = ord("t") + COLUMN_NAME = ord("c") + DATATYPE_NAME = ord("d") + CONSTRAINT_NAME = ord("n") + SOURCE_FILE = ord("F") + SOURCE_LINE = ord("L") + SOURCE_FUNCTION = ord("R") + + +class ConnectionStatus(enum.IntEnum): + """PostgreSQL connection status codes from libpq.""" + + CONNECTION_OK = 0 + CONNECTION_BAD = 1 + + +class ExecStatus(enum.IntEnum): + """PostgreSQL result status codes from PQresultStatus.""" + + PGRES_EMPTY_QUERY = 0 + PGRES_COMMAND_OK = 1 + PGRES_TUPLES_OK = 2 + PGRES_COPY_OUT = 3 + PGRES_COPY_IN = 4 + PGRES_BAD_RESPONSE = 5 + PGRES_NONFATAL_ERROR = 6 + PGRES_FATAL_ERROR = 7 + PGRES_COPY_BOTH = 8 + PGRES_SINGLE_TUPLE = 9 + PGRES_PIPELINE_SYNC = 10 + PGRES_PIPELINE_ABORTED = 11 + + +class _PGconn(ctypes.Structure): + pass + + +class _PGresult(ctypes.Structure): + pass + + +_PGconn_p = ctypes.POINTER(_PGconn) +_PGresult_p = ctypes.POINTER(_PGresult) diff --git a/src/test/pytest/libpq/errors.py b/src/test/pytest/libpq/errors.py index a43b5f4ab01bb..b057677bf0ced 100644 --- a/src/test/pytest/libpq/errors.py +++ b/src/test/pytest/libpq/errors.py @@ -59,3 +59,102 @@ def sqlstate_class(self) -> Optional[str]: if self.sqlstate and len(self.sqlstate) >= 2: return self.sqlstate[:2] return None + + +# Named subclasses for the SQLSTATEs tests most often assert on, so a test can +# write ``with pytest.raises(QueryCanceled):`` instead of catching the generic +# LibpqError and then checking ``.sqlstate``. Each maps to its five-character +# SQLSTATE; for_sqlstate() picks the right class when an error is raised. +class SyntaxErrorState(LibpqError): + """42601 -- syntax_error. (Trailing underscore avoids the builtin.)""" + + +class UndefinedTable(LibpqError): + """42P01 -- undefined_table.""" + + +class UndefinedColumn(LibpqError): + """42703 -- undefined_column.""" + + +class InsufficientPrivilege(LibpqError): + """42501 -- insufficient_privilege.""" + + +class UniqueViolation(LibpqError): + """23505 -- unique_violation.""" + + +class ForeignKeyViolation(LibpqError): + """23503 -- foreign_key_violation.""" + + +class NotNullViolation(LibpqError): + """23502 -- not_null_violation.""" + + +class CheckViolation(LibpqError): + """23514 -- check_violation.""" + + +class SerializationFailure(LibpqError): + """40001 -- serialization_failure.""" + + +class DeadlockDetected(LibpqError): + """40P01 -- deadlock_detected.""" + + +class QueryCanceled(LibpqError): + """57014 -- query_canceled.""" + + +class AdminShutdown(LibpqError): + """57P01 -- admin_shutdown.""" + + +class CrashShutdown(LibpqError): + """57P02 -- crash_shutdown.""" + + +class CannotConnectNow(LibpqError): + """57P03 -- cannot_connect_now.""" + + +class ReadOnlySqlTransaction(LibpqError): + """25006 -- read_only_sql_transaction.""" + + +class ObjectInUse(LibpqError): + """55006 -- object_in_use.""" + + +# SQLSTATE -> exception subclass. Anything not listed raises a plain LibpqError. +_SQLSTATE_EXCEPTIONS = { + "42601": SyntaxErrorState, + "42P01": UndefinedTable, + "42703": UndefinedColumn, + "42501": InsufficientPrivilege, + "23505": UniqueViolation, + "23503": ForeignKeyViolation, + "23502": NotNullViolation, + "23514": CheckViolation, + "40001": SerializationFailure, + "40P01": DeadlockDetected, + "57014": QueryCanceled, + "57P01": AdminShutdown, + "57P02": CrashShutdown, + "57P03": CannotConnectNow, + "25006": ReadOnlySqlTransaction, + "55006": ObjectInUse, +} + + +def for_sqlstate(sqlstate: Optional[str]) -> type: + """Return the LibpqError subclass for *sqlstate*, or LibpqError itself. + + Used when raising a SQL error so callers can match on the specific + condition (e.g. ``pytest.raises(QueryCanceled)``) while still catching the + base LibpqError/PgSqlError when they want any failure. + """ + return _SQLSTATE_EXCEPTIONS.get(sqlstate or "", LibpqError) diff --git a/src/test/pytest/libpq/findlib.py b/src/test/pytest/libpq/findlib.py new file mode 100644 index 0000000000000..e51011950a2ab --- /dev/null +++ b/src/test/pytest/libpq/findlib.py @@ -0,0 +1,142 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""Locate and load libpq at runtime for the in-process ctypes binding. + +The framework dlopens libpq and calls it directly, so it must find the right +library for the build under test and confirm the interpreter shares its ABI +before loading (a 64-bit Python against a 32-bit libpq otherwise fails every +test). load_libpq_handle() returns a CDLL with the PQ* prototypes initialized. +""" + +import ctypes +import os +import platform + +from .constants import _PGconn_p, _PGresult_p + + +def _libpq_path(libdir, bindir): + """Return the platform-specific full path to libpq for this build.""" + system = platform.system() + if system in ("Linux", "FreeBSD", "NetBSD", "OpenBSD"): + return os.path.join(libdir, "libpq.so.5") + if system == "Darwin": + return os.path.join(libdir, "libpq.5.dylib") + if system == "Windows": + # On Windows, libpq.dll is confusingly in bindir, not libdir. + return os.path.join(bindir, "libpq.dll") + raise AssertionError("the libpq fixture must be updated for {}".format(system)) + + +def _elf_class(path): + """Return 1 (ELFCLASS32), 2 (ELFCLASS64), or None if path is not ELF.""" + try: + with open(path, "rb") as fh: + ident = fh.read(5) + except OSError: + return None + if ident[:4] != b"\x7fELF": + return None + return ident[4] # e_ident[EI_CLASS]: 1 = 32-bit, 2 = 64-bit + + +def libpq_abi_skip_reason(libdir, bindir): + """Return a reason to skip if this Python cannot load the build's libpq. + + The framework loads libpq in-process via ctypes, so the interpreter and the + library must share an ABI. The common mismatch is a 64-bit Python against a + 32-bit libpq (meson's -m32 build), which otherwise fails every test with + OSError: wrong ELF class. Detect it by reading the library's ELF header + rather than dlopen()ing it -- a trial dlopen of an ASan-instrumented libpq + would abort the process, not raise. Returns None when the ABI matches, when + libpq cannot be located, or when the file is not ELF (macOS/Windows). + + Co-authored-by: Andrew Dunstan + """ + try: + path = _libpq_path(libdir, bindir) + except AssertionError: + return None + elf_class = _elf_class(path) + if elf_class is None: + return None + py_bits = ctypes.sizeof(ctypes.c_void_p) * 8 + lib_bits = 64 if elf_class == 2 else 32 + if py_bits != lib_bits: + return ( + "{py}-bit Python cannot load {lib}-bit libpq ({path}); the " + "in-process libpq framework needs a {lib}-bit interpreter".format( + py=py_bits, lib=lib_bits, path=path + ) + ) + return None + + +def load_libpq_handle(libdir, bindir): + """ + Loads a ctypes handle for libpq. Some common function prototypes are + initialized for general use. + """ + libpq_path = _libpq_path(libdir, bindir) + lib = ctypes.CDLL(libpq_path) + + # + # Function Prototypes + # + + lib.PQconnectdb.restype = _PGconn_p + lib.PQconnectdb.argtypes = [ctypes.c_char_p] + + lib.PQstatus.restype = ctypes.c_int + lib.PQstatus.argtypes = [_PGconn_p] + + lib.PQexec.restype = _PGresult_p + lib.PQexec.argtypes = [_PGconn_p, ctypes.c_char_p] + + lib.PQresultStatus.restype = ctypes.c_int + lib.PQresultStatus.argtypes = [_PGresult_p] + + lib.PQclear.restype = None + lib.PQclear.argtypes = [_PGresult_p] + + lib.PQerrorMessage.restype = ctypes.c_char_p + lib.PQerrorMessage.argtypes = [_PGconn_p] + + lib.PQfinish.restype = None + lib.PQfinish.argtypes = [_PGconn_p] + + lib.PQresultErrorMessage.restype = ctypes.c_char_p + lib.PQresultErrorMessage.argtypes = [_PGresult_p] + + lib.PQntuples.restype = ctypes.c_int + lib.PQntuples.argtypes = [_PGresult_p] + + lib.PQnfields.restype = ctypes.c_int + lib.PQnfields.argtypes = [_PGresult_p] + + lib.PQfname.restype = ctypes.c_char_p + lib.PQfname.argtypes = [_PGresult_p, ctypes.c_int] + + lib.PQgetvalue.restype = ctypes.c_char_p + lib.PQgetvalue.argtypes = [_PGresult_p, ctypes.c_int, ctypes.c_int] + + lib.PQgetisnull.restype = ctypes.c_int + lib.PQgetisnull.argtypes = [_PGresult_p, ctypes.c_int, ctypes.c_int] + + lib.PQftype.restype = ctypes.c_uint + lib.PQftype.argtypes = [_PGresult_p, ctypes.c_int] + + lib.PQresultErrorField.restype = ctypes.c_char_p + lib.PQresultErrorField.argtypes = [_PGresult_p, ctypes.c_int] + + # Asynchronous notification (LISTEN/NOTIFY) handling. + lib.PQconsumeInput.restype = ctypes.c_int + lib.PQconsumeInput.argtypes = [_PGconn_p] + + lib.PQnotifies.restype = ctypes.c_void_p + lib.PQnotifies.argtypes = [_PGconn_p] + + lib.PQfreemem.restype = None + lib.PQfreemem.argtypes = [ctypes.c_void_p] + + return lib diff --git a/src/test/pytest/libpq/oids.py b/src/test/pytest/libpq/oids.py new file mode 100644 index 0000000000000..ae015dbcfb66d --- /dev/null +++ b/src/test/pytest/libpq/oids.py @@ -0,0 +1,141 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""PostgreSQL type-OID registry and text-to-Python value conversion. + +Maps a column's type OID to a converter so the in-process binding can return +real Python values (int, bool, datetime, list, ...) from the text-format wire +data, mirroring what psql would print. register_type_info() extends the table; +simplify_query_results() collapses 1x1 / 1xN / Nx1 shapes for conn.sql(). +""" + +import datetime +import decimal +import json +import uuid +from typing import Any, Callable, Dict + +# Type registry - maps OID to converter function +_type_converters: Dict[int, Callable[[str], Any]] = {} +_array_to_elem_map: Dict[int, int] = {} + + +def register_type_info( + name: str, oid: int, array_oid: int, converter: Callable[[str], Any] +): + """ + Register a PostgreSQL type with its OID, array OID, and conversion function. + + Usage: + register_type_info("bool", 16, 1000, lambda v: v == "t") + """ + _type_converters[oid] = converter + if array_oid is not None: + _array_to_elem_map[array_oid] = oid + + +def _parse_array(value: str, elem_oid: int): + """Parse PostgreSQL array syntax into nested Python lists.""" + stack: list[list] = [] + current_element: list[str] = [] + in_quotes = False + was_quoted = False + pos = 0 + + while pos < len(value): + char = value[pos] + + if in_quotes: + if char == "\\": + next_char = value[pos + 1] + if next_char not in '"\\': + raise NotImplementedError('Only \\" and \\\\ escapes are supported') + current_element.append(next_char) + pos += 2 + continue + if char == '"': + in_quotes = False + else: + current_element.append(char) + elif char == '"': + in_quotes = True + was_quoted = True + elif char == "{": + stack.append([]) + elif char in ",}": + if current_element or was_quoted: + elem = "".join(current_element) + if not was_quoted and elem == "NULL": + stack[-1].append(None) + else: + stack[-1].append(_convert_pg_value(elem, elem_oid)) + current_element = [] + was_quoted = False + if char == "}": + completed = stack.pop() + if not stack: + return completed + stack[-1].append(completed) + elif char != " ": + current_element.append(char) + pos += 1 + + raise ValueError(f"Malformed array literal: {value}") + + +def _convert_pg_value(value: str, type_oid: int) -> Any: + """ + Convert PostgreSQL string value to appropriate Python type based on OID. + Uses the registered type converters from register_type_info(). + """ + # Check if it's an array type + if type_oid in _array_to_elem_map: + elem_oid = _array_to_elem_map[type_oid] + return _parse_array(value, elem_oid) + + # Use registered converter if available + converter = _type_converters.get(type_oid) + if converter: + return converter(value) + + # Unknown types - return as string + return value + + +def simplify_query_results(results) -> Any: + """ + Simplify the results of a query so that the caller doesn't have to unpack + lists and tuples of length 1. + """ + if len(results) == 1: + row = results[0] + if len(row) == 1: + # If there's only a single cell, just return the value + return row[0] + # If there's only a single row, just return that row + return row + + if len(results) != 0 and len(results[0]) == 1: + # If there's only a single column, return an array of values + return [row[0] for row in results] + + # if there are multiple rows and columns, return the results as is + return results + + +# Register standard PostgreSQL types that we'll likely encounter in tests +register_type_info("bool", 16, 1000, lambda v: v == "t") +register_type_info("int2", 21, 1005, int) +register_type_info("int4", 23, 1007, int) +register_type_info("int8", 20, 1016, int) +register_type_info("float4", 700, 1021, float) +register_type_info("float8", 701, 1022, float) +register_type_info("numeric", 1700, 1231, decimal.Decimal) +register_type_info("text", 25, 1009, str) +register_type_info("varchar", 1043, 1015, str) +register_type_info("date", 1082, 1182, datetime.date.fromisoformat) +register_type_info("time", 1083, 1183, datetime.time.fromisoformat) +register_type_info("timestamp", 1114, 1115, datetime.datetime.fromisoformat) +register_type_info("timestamptz", 1184, 1185, datetime.datetime.fromisoformat) +register_type_info("uuid", 2950, 2951, uuid.UUID) +register_type_info("json", 114, 199, json.loads) +register_type_info("jsonb", 3802, 3807, json.loads) diff --git a/src/test/pytest/libpq/pgnotify.py b/src/test/pytest/libpq/pgnotify.py new file mode 100644 index 0000000000000..645e80ed22e6c --- /dev/null +++ b/src/test/pytest/libpq/pgnotify.py @@ -0,0 +1,47 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""The PGnotify struct and a helper to read a LISTEN/NOTIFY notification. + +PQnotifies() returns a pointer to a heap-allocated PGnotify that the caller must +free with PQfreemem(); read_notification() casts the pointer to read the fields, +decodes the strings while the memory is still valid, then frees it. + +Adopted from Andrew Dunstan's pgnotify module: it lets the in-process libpq +channel (PGconn) consume asynchronous notifications, which the psql channel gets +for free from psql's text echo. +""" + +import ctypes + + +class PGnotify(ctypes.Structure): + """typedef struct pgNotify { char *relname; int be_pid; char *extra; }.""" + + _fields_ = [ + ("relname", ctypes.c_char_p), # notification channel name + ("be_pid", ctypes.c_int), # PID of the notifying backend + ("extra", ctypes.c_char_p), # notification payload string + ] + + +_PGnotify_p = ctypes.POINTER(PGnotify) + + +def read_notification(lib, raw): + """Turn the raw PQnotifies pointer *raw* into a dict and free it. + + Returns ``{"channel", "pid", "payload"}`` or ``None`` if *raw* is NULL. + """ + if not raw: + return None + notify = ctypes.cast(raw, _PGnotify_p).contents + # Decode while the memory is still valid (before PQfreemem). + result = { + "channel": ( + notify.relname.decode("utf-8", "replace") if notify.relname else None + ), + "pid": notify.be_pid, + "payload": (notify.extra.decode("utf-8", "replace") if notify.extra else None), + } + lib.PQfreemem(ctypes.c_void_p(raw)) + return result diff --git a/src/test/pytest/libpq/result.py b/src/test/pytest/libpq/result.py new file mode 100644 index 0000000000000..7fdb725a96da2 --- /dev/null +++ b/src/test/pytest/libpq/result.py @@ -0,0 +1,147 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""The libpq result wrapper and its structured form. + +``PGresult`` wraps a raw PGresult pointer (status, error fields, row fetch). +``ResultData`` is the structured snapshot a query returns -- status plus the +column ``names`` and ``types`` (OIDs) and the converted ``rows`` -- so callers +can introspect a result's shape, not just its values. The column metadata is +adopted from Andrew Dunstan's ResultData. +""" + +import contextlib +import ctypes +from dataclasses import dataclass, field +from typing import List, NoReturn, Optional, Tuple + +from .constants import DiagField, ExecStatus, _PGresult_p +from .errors import LibpqError, for_sqlstate +from .oids import _convert_pg_value + + +@dataclass +class ResultData: + """Structured form of a successful query result. + + Carries the column ``names`` and ``types`` (PostgreSQL type OIDs) alongside + the converted ``rows``, so a test can assert on a result's shape and column + typing rather than only its values. + + Co-authored-by: Andrew Dunstan + """ + + status: int + names: List[str] = field(default_factory=list) + types: List[int] = field(default_factory=list) + rows: List[Tuple] = field(default_factory=list) + + +class PGresult(contextlib.AbstractContextManager): + """Wraps a raw _PGresult_p with a more friendly interface.""" + + def __init__(self, lib: ctypes.CDLL, res: _PGresult_p): # type: ignore[valid-type] + self._lib = lib + # Cleared to None on __exit__ once the result has been freed. + self._res: Optional[_PGresult_p] = res # type: ignore[valid-type] + + def __exit__(self, *exc): + self._lib.PQclear(self._res) + self._res = None + + def status(self) -> ExecStatus: + return ExecStatus(self._lib.PQresultStatus(self._res)) + + def error_message(self): + """Returns the error message associated with this result.""" + msg = self._lib.PQresultErrorMessage(self._res) + return msg.decode() if msg else "" + + def _get_error_field(self, field_: DiagField) -> Optional[str]: + """Get an error field from the result using PQresultErrorField.""" + val = self._lib.PQresultErrorField(self._res, int(field_)) + return val.decode() if val else None + + def raise_error(self) -> NoReturn: + """ + Raises LibpqError (or its SQLSTATE-specific subclass) with diagnostic + information from the result. + """ + if not self._res: + raise LibpqError("query failed: out of memory or connection lost") + + sqlstate = self._get_error_field(DiagField.SQLSTATE) + primary = self._get_error_field(DiagField.MESSAGE_PRIMARY) + detail = self._get_error_field(DiagField.MESSAGE_DETAIL) + hint = self._get_error_field(DiagField.MESSAGE_HINT) + severity = self._get_error_field(DiagField.SEVERITY) + schema_name = self._get_error_field(DiagField.SCHEMA_NAME) + table_name = self._get_error_field(DiagField.TABLE_NAME) + column_name = self._get_error_field(DiagField.COLUMN_NAME) + datatype_name = self._get_error_field(DiagField.DATATYPE_NAME) + constraint_name = self._get_error_field(DiagField.CONSTRAINT_NAME) + context = self._get_error_field(DiagField.CONTEXT) + + position_str = self._get_error_field(DiagField.STATEMENT_POSITION) + position = int(position_str) if position_str else None + + raise for_sqlstate(sqlstate)( + primary or self.error_message(), + sqlstate=sqlstate, + severity=severity, + primary=primary, + detail=detail, + hint=hint, + schema_name=schema_name, + table_name=table_name, + column_name=column_name, + datatype_name=datatype_name, + constraint_name=constraint_name, + position=position, + context=context, + ) + + def column_names(self) -> List[str]: + """Return the result's column names, in order.""" + ncols = self._lib.PQnfields(self._res) + return [self._lib.PQfname(self._res, col).decode() for col in range(ncols)] + + def column_types(self) -> List[int]: + """Return the result's column type OIDs, in order.""" + ncols = self._lib.PQnfields(self._res) + return [self._lib.PQftype(self._res, col) for col in range(ncols)] + + def fetch_all(self) -> List[Tuple]: + """ + Fetch all rows and convert to Python types. + Returns a list of tuples, with values converted based on their PostgreSQL type. + """ + nrows = self._lib.PQntuples(self._res) + ncols = self._lib.PQnfields(self._res) + + # Get type OIDs for each column + type_oids = [self._lib.PQftype(self._res, col) for col in range(ncols)] + + results = [] + for row in range(nrows): + row_data: list = [] + for col in range(ncols): + if self._lib.PQgetisnull(self._res, row, col): + row_data.append(None) + else: + value = self._lib.PQgetvalue(self._res, row, col).decode() + row_data.append(_convert_pg_value(value, type_oids[col])) + results.append(tuple(row_data)) + + return results + + def data(self) -> ResultData: + """Return a ResultData snapshot: status, column names/types, and rows.""" + status = self.status() + if status == ExecStatus.PGRES_TUPLES_OK: + return ResultData( + status=int(status), + names=self.column_names(), + types=self.column_types(), + rows=self.fetch_all(), + ) + return ResultData(status=int(status)) diff --git a/src/test/pytest/libpq/session.py b/src/test/pytest/libpq/session.py new file mode 100644 index 0000000000000..acb744b08971a --- /dev/null +++ b/src/test/pytest/libpq/session.py @@ -0,0 +1,171 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +"""The in-process libpq connection (Session) and connect()/connstr() helpers. + +``PGconn`` wraps a live connection: ``exec()`` returns the raw result, ``sql()`` +returns simplified Python values (1x1 -> scalar, Nx1 -> list, ...), and +``query()`` returns a structured :class:`ResultData` with column names/types. +All raise the SQLSTATE-specific :class:`LibpqError` subclass on failure. +""" + +import contextlib +import ctypes +from typing import Any, Callable, Dict + +from .constants import ConnectionStatus, ExecStatus, _PGconn_p +from .errors import LibpqError +from .oids import simplify_query_results +from .pgnotify import read_notification +from .result import PGresult, ResultData + + +class PGconn(contextlib.AbstractContextManager): + """Wraps a raw _PGconn_p with a more friendly interface.""" + + def __init__( + self, + lib: ctypes.CDLL, + handle: _PGconn_p, # type: ignore[valid-type] + stack: contextlib.ExitStack, + ): + self._lib = lib + # Cleared to None on __exit__ once the connection has been finished. + self._handle = handle + self._stack = stack + + def __exit__(self, *exc): + self._lib.PQfinish(self._handle) + self._handle = None + + def exec(self, query: str) -> PGresult: + """ + Executes a query via PQexec() and returns a PGresult. + """ + res = self._lib.PQexec(self._handle, query.encode()) + return self._stack.enter_context(PGresult(self._lib, res)) + + def sql(self, query: str): # pylint: disable=inconsistent-return-statements + """ + Executes a query and raises an exception if it fails. + Returns the query results with automatic type conversion and simplification. + For commands that don't return data (INSERT, UPDATE, etc.), returns None. + + Examples: + - SELECT 1 -> 1 + - SELECT 1, 2 -> (1, 2) + - SELECT * FROM generate_series(1, 3) -> [1, 2, 3] + - SELECT * FROM (VALUES (1, 'a'), (2, 'b')) t -> [(1, 'a'), (2, 'b')] + - CREATE TABLE ... -> None + - INSERT INTO ... -> None + """ + res = self.exec(query) + status = res.status() + + if status == ExecStatus.PGRES_COMMAND_OK: + return None + if status == ExecStatus.PGRES_TUPLES_OK: + results = res.fetch_all() + return simplify_query_results(results) + # PGRES_FATAL_ERROR and anything else: raise (raise_error is NoReturn). + res.raise_error() + + def query(self, query: str) -> ResultData: + """Execute *query* and return a structured ResultData. + + Unlike sql(), this never simplifies: the caller gets the column names, + column type OIDs, and the full row list, which is what tests asserting + on result shape or column typing need. Raises on a failed status. + """ + res = self.exec(query) + status = res.status() + if status not in (ExecStatus.PGRES_TUPLES_OK, ExecStatus.PGRES_COMMAND_OK): + res.raise_error() + return res.data() + + def get_notification(self): + """Return one pending LISTEN/NOTIFY notification, or None. + + Consumes any input waiting on the socket first, then pops a single + notification. The result is ``{"channel", "pid", "payload"}``. This is + the in-process libpq equivalent of reading psql's notification echo. + """ + self._lib.PQconsumeInput(self._handle) + raw = self._lib.PQnotifies(self._handle) + return read_notification(self._lib, raw) + + def get_all_notifications(self): + """Return all currently-pending notifications as a list of dicts.""" + notifications = [] + while True: + notify = self.get_notification() + if notify is None: + return notifications + notifications.append(notify) + + +def connstr(opts: Dict[str, Any]) -> str: + """ + Flattens the provided options into a libpq connection string. Values + are converted to str and quoted/escaped as necessary. + """ + settings = [] + + for k, v in opts.items(): + v = str(v) + if not v: + v = "''" + else: + v = v.replace("\\", "\\\\") + v = v.replace("'", "\\'") + + if " " in v: + v = f"'{v}'" + + settings.append(f"{k}={v}") + + return " ".join(settings) + + +def connect( + libpq_handle: ctypes.CDLL, + stack: contextlib.ExitStack, + remaining_timeout_fn: Callable[[], float], + **opts, +) -> PGconn: + """ + Connects to a server, using the given connection options, and + returns a PGconn object wrapping the connection handle. A + failure will raise LibpqError. + + Connections honor PG_TEST_TIMEOUT_DEFAULT unless connect_timeout is + explicitly overridden in opts. + + Args: + libpq_handle: ctypes.CDLL handle to libpq library + stack: ExitStack for managing connection cleanup + remaining_timeout_fn: Function that returns remaining timeout in seconds + **opts: Connection options (host, port, dbname, etc.) + + Returns: + PGconn: Connected database connection + + Raises: + LibpqError: If connection fails + """ + + if "connect_timeout" not in opts: + t = int(remaining_timeout_fn()) + opts["connect_timeout"] = max(t, 1) + + conn_p = libpq_handle.PQconnectdb(connstr(opts).encode()) + + # Check connection status before adding to stack + if libpq_handle.PQstatus(conn_p) != ConnectionStatus.CONNECTION_OK: + error_msg = libpq_handle.PQerrorMessage(conn_p).decode() + # Manually close the failed connection + libpq_handle.PQfinish(conn_p) + raise LibpqError(error_msg) + + # Connection succeeded - add to stack for cleanup + conn = stack.enter_context(PGconn(libpq_handle, conn_p, stack=stack)) + return conn diff --git a/src/test/pytest/pypg/__init__.py b/src/test/pytest/pypg/__init__.py index 809e736ffc822..8fecbb33c5561 100644 --- a/src/test/pytest/pypg/__init__.py +++ b/src/test/pytest/pypg/__init__.py @@ -6,7 +6,25 @@ test_timeout_default, ) from .command import CommandResult, PgBin, ProgramResult -from .errors import PgError, PgServerError, PgSqlError, LibpqError +from .errors import ( + PgError, + PgServerError, + PgSqlError, + LibpqError, + QueryCanceled, + UniqueViolation, + ForeignKeyViolation, + CheckViolation, + NotNullViolation, + InsufficientPrivilege, + UndefinedTable, + UndefinedColumn, + SerializationFailure, + DeadlockDetected, + AdminShutdown, + CrashShutdown, + CannotConnectNow, +) from .fake import faker, meaningful_text, rand_str from .modes import CatchupMode, SlotCatchupMode, StopMode from .kerberos import KerberosServer @@ -14,6 +32,7 @@ from .sqlresult import SqlResult from .util import ( wait_for_file, + wait_until, compare_files, check_pg_config, scan_server_header, @@ -45,6 +64,19 @@ "PgServerError", "PgSqlError", "LibpqError", + "QueryCanceled", + "UniqueViolation", + "ForeignKeyViolation", + "CheckViolation", + "NotNullViolation", + "InsufficientPrivilege", + "UndefinedTable", + "UndefinedColumn", + "SerializationFailure", + "DeadlockDetected", + "AdminShutdown", + "CrashShutdown", + "CannotConnectNow", "append_to_file", "check_mode_recursive", "chmod_recursive", @@ -55,4 +87,5 @@ "scan_server_header", "compare_files", "wait_for_file", + "wait_until", ] diff --git a/src/test/pytest/pypg/errors.py b/src/test/pytest/pypg/errors.py index 9574204abc4f1..f1c29e8b1068f 100644 --- a/src/test/pytest/pypg/errors.py +++ b/src/test/pytest/pypg/errors.py @@ -19,7 +19,22 @@ from __future__ import annotations -from libpq.errors import LibpqError +from libpq.errors import ( + LibpqError, + QueryCanceled, + UniqueViolation, + ForeignKeyViolation, + CheckViolation, + NotNullViolation, + InsufficientPrivilege, + UndefinedTable, + UndefinedColumn, + SerializationFailure, + DeadlockDetected, + AdminShutdown, + CrashShutdown, + CannotConnectNow, +) # A failed SQL statement, regardless of execution channel (libpq or psql). PgSqlError = LibpqError @@ -39,4 +54,22 @@ class PgServerError(PgError): """ -__all__ = ["PgError", "PgServerError", "PgSqlError", "LibpqError"] +__all__ = [ + "PgError", + "PgServerError", + "PgSqlError", + "LibpqError", + "QueryCanceled", + "UniqueViolation", + "ForeignKeyViolation", + "CheckViolation", + "NotNullViolation", + "InsufficientPrivilege", + "UndefinedTable", + "UndefinedColumn", + "SerializationFailure", + "DeadlockDetected", + "AdminShutdown", + "CrashShutdown", + "CannotConnectNow", +] diff --git a/src/test/pytest/pyt/test_errors.py b/src/test/pytest/pyt/test_errors.py index 771fe8f76e362..3adb8b96b4436 100644 --- a/src/test/pytest/pyt/test_errors.py +++ b/src/test/pytest/pyt/test_errors.py @@ -5,7 +5,8 @@ """ import pytest -from libpq import LibpqError +import pypg +from libpq import LibpqError, QueryCanceled, SyntaxErrorState, UniqueViolation def test_syntax_error(conn): @@ -32,3 +33,47 @@ def test_unique_violation(conn): assert err.table_name == "test_uv" assert err.constraint_name == "test_uv_pk" assert err.detail == "Key (id)=(1) already exists." + + +def test_named_exception_subclass(conn): + """A specific SQLSTATE raises its named subclass, matchable directly.""" + with pytest.raises(SyntaxErrorState): + conn.sql("SELEC 1") + with pytest.raises(UniqueViolation): + conn.sql( + "CREATE TEMP TABLE t (id int PRIMARY KEY);" + "INSERT INTO t VALUES (1); INSERT INTO t VALUES (1);" + ) + + +def test_named_exception_is_libpqerror(conn): + """A named subclass is still catchable as the base LibpqError/PgSqlError.""" + with pytest.raises(LibpqError): + conn.sql("SELEC 1") + with pytest.raises(pypg.PgSqlError): + conn.sql("SELEC 1") + + +def test_query_canceled_matches(conn): + """statement_timeout cancellation raises QueryCanceled (SQLSTATE 57014).""" + conn.sql("SET statement_timeout = '50ms'") + with pytest.raises(QueryCanceled) as exc_info: + conn.sql("SELECT pg_sleep(5)") + assert exc_info.value.sqlstate == "57014" + + +def test_wait_until_breaks(pg): + """wait_until polls until the body breaks, without timing out.""" + seen = [] + for _ in pypg.wait_until("never reached", timeout=5, interval=0.01): + seen.append(pg.sql("SELECT 1").scalar()) + if seen[-1] == "1": + break + assert seen[-1] == "1" + + +def test_wait_until_times_out(): + """wait_until raises TimeoutError when the body never breaks.""" + with pytest.raises(TimeoutError): + for _ in pypg.wait_until("deliberate timeout", timeout=0.2, interval=0.05): + pass diff --git a/src/test/pytest/pyt/test_query_helpers.py b/src/test/pytest/pyt/test_query_helpers.py index abcd90842142b..94d524fcc5aef 100644 --- a/src/test/pytest/pyt/test_query_helpers.py +++ b/src/test/pytest/pyt/test_query_helpers.py @@ -345,3 +345,37 @@ def test_array_with_null(conn): result = conn.sql("SELECT ARRAY[1, NULL, 3]") assert result == [1, None, 3] + + +def test_query_returns_result_data(conn): + """conn.query() returns a structured ResultData with column metadata.""" + data = conn.query("SELECT 1 AS a, 'x'::text AS b") + assert data.names == ["a", "b"] + assert data.types == [23, 25] # int4, text OIDs + assert data.rows == [(1, "x")] + + +def test_query_no_simplification(conn): + """query() never collapses shape: a single scalar stays a 1x1 row list.""" + data = conn.query("SELECT 42") + assert data.rows == [(42,)] + assert data.names == ["?column?"] + + +def test_listen_notify(conn): + """The libpq channel can receive asynchronous LISTEN/NOTIFY messages.""" + conn.sql("LISTEN chan") + conn.sql("NOTIFY chan, 'hello'") + notify = conn.get_notification() + assert notify is not None + assert notify["channel"] == "chan" + assert notify["payload"] == "hello" + assert isinstance(notify["pid"], int) + + +def test_get_all_notifications(conn): + """get_all_notifications drains every pending notification in order.""" + conn.sql("LISTEN chan") + conn.sql("NOTIFY chan, 'a'; NOTIFY chan, 'b'") + payloads = [n["payload"] for n in conn.get_all_notifications()] + assert payloads == ["a", "b"] From ba7ea2ab715d96c799820e682a9d80be697a76ec Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 15 Jun 2026 14:44:22 -0400 Subject: [PATCH 33/36] pytest: address code-review fidelity findings in ported tests Fix issues flagged by the automated PR review: - Use query_safe (not query) where the Perl original does, so an ERROR/FATAL/ PANIC is not silently swallowed: test_007_catcache_inval (the bug-under-test), test_001_multixact, pg_visibility/001, amcheck/002_cic. - worker_spi/001: compare safe_psql result to "t" instead of Python truthiness ("f" is a truthy non-empty string). - pg_prewarm/001: restore the "prefetch is not supported" OR branch, and run the post-GRANT prewarm as test_user (psql_capture --username) so the GRANT is actually validated rather than vacuously passing as superuser. - brin/01_workitems: assert the poll_query_until calls and f-string the $count messages (were printing the literal Perl token). - brin/02_wal_consistency: use wait_for_replay_catchup (flush LSN) to match the Perl original, not wait_for_catchup (write LSN). - commit_ts/003_standby_2: f-string the $standby_ts diagnostic. - pg_combinebackup/011: pass has_archiving=True to match the Perl init. - pg_rewind/001: guard the unix-permission check with platform != Windows. - pg_rewind/006: use two distinct temp dirs like the original. - pg_verifybackup/009: remove the backup before the possibly_unsupported continue so the reused path is clean. - pg_ctl/001: restore the Windows-only post-start sleep. - pg_resetwal/001: comment the intentional multixact-ids hex arithmetic fix. - test_json_parser/002 and test_checksums/006: resolve the test binaries against the build bindir (PATH / node.bin.popen) instead of the ambient PATH. - test_load_balance: restart the nodes the failover test stops so the module-scoped fixture is not left inconsistent. --- contrib/amcheck/pyt/test_002_cic.py | 2 +- contrib/pg_prewarm/pyt/test_001_basic.py | 16 +++++++++++----- .../pyt/test_001_concurrent_transaction.py | 4 ++-- .../pyt/test_011_ib_truncation.py | 4 +++- src/bin/pg_ctl/pyt/test_001_start_stop.py | 7 +++++++ src/bin/pg_resetwal/pyt/test_001_basic.py | 5 +++++ src/bin/pg_rewind/pyt/test_001_basic.py | 11 ++++++++--- src/bin/pg_rewind/pyt/test_006_options.py | 4 ++-- src/bin/pg_verifybackup/pyt/test_009_extract.py | 4 ++++ src/interfaces/libpq/pyt/test_load_balance.py | 5 +++++ src/test/modules/brin/pyt/test_01_workitems.py | 12 ++++++------ .../modules/brin/pyt/test_02_wal_consistency.py | 2 +- .../modules/commit_ts/pyt/test_003_standby_2.py | 2 +- .../pyt/test_006_pgbench_single.py | 11 ++++------- .../test_json_parser/pyt/test_002_inline.py | 12 +++++++++--- .../test_misc/pyt/test_007_catcache_inval.py | 6 +++--- .../modules/test_slru/pyt/test_001_multixact.py | 2 +- .../worker_spi/pyt/test_001_worker_spi.py | 2 +- 18 files changed, 74 insertions(+), 37 deletions(-) diff --git a/contrib/amcheck/pyt/test_002_cic.py b/contrib/amcheck/pyt/test_002_cic.py index d33b20e16e6fa..0524f8fb4532a 100644 --- a/contrib/amcheck/pyt/test_002_cic.py +++ b/contrib/amcheck/pyt/test_002_cic.py @@ -54,7 +54,7 @@ def test_002_cic(create_pg): node.safe_psql("CREATE TABLE quebec(i int primary key)") node.safe_psql("INSERT INTO quebec SELECT i FROM generate_series(1, 2) s(i);") in_progress_h = node.background_psql("postgres") - in_progress_h.query("BEGIN; SELECT pg_current_xact_id();") + in_progress_h.query_safe("BEGIN; SELECT pg_current_xact_id();") node.safe_psql("DELETE FROM quebec WHERE i = 1;") node.safe_psql("CREATE INDEX CONCURRENTLY oscar ON quebec(i);") result = node.psql_capture( diff --git a/contrib/pg_prewarm/pyt/test_001_basic.py b/contrib/pg_prewarm/pyt/test_001_basic.py index a409def91b239..540dd70fec6b2 100644 --- a/contrib/pg_prewarm/pyt/test_001_basic.py +++ b/contrib/pg_prewarm/pyt/test_001_basic.py @@ -31,7 +31,9 @@ def test_001_basic(pg_bin, create_pg): result, ), "buffer mode succeeded" result = node.psql_capture("SELECT pg_prewarm('test', 'prefetch');") - assert re.search(r"""^[1-9][0-9]*$""", result.stdout), "prefetch mode succeeded" + assert re.search(r"""^[1-9][0-9]*$""", result.stdout) or re.search( + r"""prefetch is not supported by this build""", result.stderr + ), "prefetch mode succeeded" result = node.psql_capture( "SELECT pg_prewarm('test');", extra_params=["--username", "test_user"] ) @@ -45,15 +47,19 @@ def test_001_basic(pg_bin, create_pg): r"""permission denied for index test_idx""", result.stderr ), "pg_prewarm failed as expected" node.safe_psql("GRANT SELECT ON test TO test_user;") - result = node.safe_psql("SELECT pg_prewarm('test');") + result = node.psql_capture( + "SELECT pg_prewarm('test');", extra_params=["--username", "test_user"] + ) assert re.search( r"""^[1-9][0-9]*$""", - result, + result.stdout, ), "pg_prewarm succeeded as expected" - result = node.safe_psql("SELECT pg_prewarm('test_idx');") + result = node.psql_capture( + "SELECT pg_prewarm('test_idx');", extra_params=["--username", "test_user"] + ) assert re.search( r"""^[1-9][0-9]*$""", - result, + result.stdout, ), "pg_prewarm succeeded as expected" result = node.safe_psql("SELECT autoprewarm_dump_now();") assert re.search( diff --git a/contrib/pg_visibility/pyt/test_001_concurrent_transaction.py b/contrib/pg_visibility/pyt/test_001_concurrent_transaction.py index cb2278fb109ba..d494c689f6022 100644 --- a/contrib/pg_visibility/pyt/test_001_concurrent_transaction.py +++ b/contrib/pg_visibility/pyt/test_001_concurrent_transaction.py @@ -20,7 +20,7 @@ def test_001_concurrent_transaction(create_pg): standby.start() node.safe_psql("CREATE DATABASE other_database;") bsession = node.background_psql("other_database") - bsession.query("BEGIN;\n\tSELECT txid_current();") + bsession.query_safe("BEGIN;\n\tSELECT txid_current();") node.safe_psql( "CREATE EXTENSION pg_visibility;\nCREATE TABLE vacuum_test AS SELECT 42 i;\nVACUUM (disable_page_skipping) vacuum_test;" ) @@ -29,7 +29,7 @@ def test_001_concurrent_transaction(create_pg): node.wait_for_catchup(standby) result = standby.safe_psql("SELECT * FROM pg_check_visible('vacuum_test');") assert result == "", "pg_check_visible() detects no errors" - bsession.query("COMMIT;") + bsession.query_safe("COMMIT;") bsession.quit() node.stop() standby.stop() diff --git a/src/bin/pg_combinebackup/pyt/test_011_ib_truncation.py b/src/bin/pg_combinebackup/pyt/test_011_ib_truncation.py index 59f4993c14a2e..c6361deaf8860 100644 --- a/src/bin/pg_combinebackup/pyt/test_011_ib_truncation.py +++ b/src/bin/pg_combinebackup/pyt/test_011_ib_truncation.py @@ -10,7 +10,9 @@ def test_011_ib_truncation(create_pg): """Incremental backup reconstructs a relation truncated between backups.""" - primary = create_pg("primary", allows_streaming=True, start=False) + primary = create_pg( + "primary", allows_streaming=True, has_archiving=True, start=False + ) primary.append_conf("summarize_wal = on") primary.start() backup_path = primary.backup_dir diff --git a/src/bin/pg_ctl/pyt/test_001_start_stop.py b/src/bin/pg_ctl/pyt/test_001_start_stop.py index f383d5225e6d0..a2cb71474b87e 100644 --- a/src/bin/pg_ctl/pyt/test_001_start_stop.py +++ b/src/bin/pg_ctl/pyt/test_001_start_stop.py @@ -9,6 +9,7 @@ import os import platform +import time import pytest @@ -61,6 +62,12 @@ def test_start_stop(pg_bin, tmp_path, sockdir): "pg_ctl start", ) + if platform.system() == "Windows": + # Mirror the Perl original's `sleep 3 if ($windows_os)`: give the + # postmaster time to write postmaster.pid so the next pg_ctl start does + # not race wait_for_postmaster() against a stale/absent pid file. + time.sleep(3) + pg_bin.command_fails( ["pg_ctl", "start", "--pgdata", data], "second pg_ctl start fails" ) diff --git a/src/bin/pg_resetwal/pyt/test_001_basic.py b/src/bin/pg_resetwal/pyt/test_001_basic.py index 8ec533e48f998..6138f3129438a 100644 --- a/src/bin/pg_resetwal/pyt/test_001_basic.py +++ b/src/bin/pg_resetwal/pyt/test_001_basic.py @@ -159,6 +159,11 @@ def _test_control_overrides(pg_bin, node): mult = 32 * blcksz // 8 cmd += [ "--multixact-ids", + # The Perl original writes hex($files[0] * $mult) for the "old" value + # (numify the hex string in decimal, then hex()), which is an apparent + # quirk; we deliberately parse-as-hex then multiply (consistent with the + # pg_xact case). The two differ only when files[0] != "0000", which does + # not occur on a freshly-initialized cluster, so the value matches there. "{},{}".format( (int(files[-1], 16) + 1) * mult, 1 if int(files[0], 16) == 0 else int(files[0], 16) * mult, diff --git a/src/bin/pg_rewind/pyt/test_001_basic.py b/src/bin/pg_rewind/pyt/test_001_basic.py index ff56b4aaf8109..d6d1c6a704bfc 100644 --- a/src/bin/pg_rewind/pyt/test_001_basic.py +++ b/src/bin/pg_rewind/pyt/test_001_basic.py @@ -10,6 +10,8 @@ 'archive' source modes. """ +import platform + import pypg @@ -99,9 +101,12 @@ def _check_results(rt): ) rt.check_query("SELECT count(*) FROM tail_tbl", "10001", "tail-copy") rt.check_query("SELECT * FROM drop_tbl", "in primary", "drop") - assert pypg.check_mode_recursive( - rt.primary.datadir, 0o700, 0o600 - ), "check PGDATA permissions" + if platform.system() != "Windows": + # unix-style permissions are not supported on Windows (cf. the SKIP + # block in the Perl original). + assert pypg.check_mode_recursive( + rt.primary.datadir, 0o700, 0o600 + ), "check PGDATA permissions" def _run_test(rt, pg_bin, test_mode): diff --git a/src/bin/pg_rewind/pyt/test_006_options.py b/src/bin/pg_rewind/pyt/test_006_options.py index db9230d37a5a0..8d0fe3e86dea3 100644 --- a/src/bin/pg_rewind/pyt/test_006_options.py +++ b/src/bin/pg_rewind/pyt/test_006_options.py @@ -13,8 +13,8 @@ def test_006_options(pg_bin, tmp_path): pg_bin.program_help_ok("pg_rewind") pg_bin.program_version_ok("pg_rewind") pg_bin.program_options_handling_ok("pg_rewind") - primary_pgdata = tmp_path - standby_pgdata = tmp_path + primary_pgdata = tmp_path / "primary" + standby_pgdata = tmp_path / "standby" pg_bin.command_fails( [ "pg_rewind", diff --git a/src/bin/pg_verifybackup/pyt/test_009_extract.py b/src/bin/pg_verifybackup/pyt/test_009_extract.py index 88e936ed00c9b..2d8bf877bc7ac 100644 --- a/src/bin/pg_verifybackup/pyt/test_009_extract.py +++ b/src/bin/pg_verifybackup/pyt/test_009_extract.py @@ -64,6 +64,10 @@ def test_009_extract(create_pg): ) unsupported = tc.get("possibly_unsupported") if result.rc != 0 and unsupported and re.search(unsupported, result.stderr): + # Remove the (partial) backup before skipping, like the Perl + # original's unconditional rmtree: the next iteration reuses the + # same backup_path and pg_basebackup requires an empty target. + shutil.rmtree(backup_path, ignore_errors=True) continue assert result.rc == 0, "backup done, compression {}".format(method) primary.command_ok( diff --git a/src/interfaces/libpq/pyt/test_load_balance.py b/src/interfaces/libpq/pyt/test_load_balance.py index 3804814738950..6d445f73aaacb 100644 --- a/src/interfaces/libpq/pyt/test_load_balance.py +++ b/src/interfaces/libpq/pyt/test_load_balance.py @@ -168,3 +168,8 @@ def test_load_balance_hosts_failover(load_balance_nodes): with nodes[2].log_contains("connection received", times=5): for _ in range(5): connect(load_balance_hosts="random") + + # Restart the nodes stopped above so the module-scoped fixture is left in a + # consistent state and this test is not coupled to running last. + nodes[0].start() + nodes[1].start() diff --git a/src/test/modules/brin/pyt/test_01_workitems.py b/src/test/modules/brin/pyt/test_01_workitems.py index 5a619fad9adfc..fa34da4f7b3c5 100644 --- a/src/test/modules/brin/pyt/test_01_workitems.py +++ b/src/test/modules/brin/pyt/test_01_workitems.py @@ -35,20 +35,20 @@ def test_01_workitems(create_pg): "insert into journal select * from generate_series(timestamp '1976-08-01', '1976-10-28', '1 day')" ) time.sleep(1) - node.poll_query_until( + assert node.poll_query_until( "select count(*) > 1 from brin_page_items(get_raw_page('brin_wi_idx', 2), 'brin_wi_idx'::regclass)", expected="t", - ) + ), "brin_wi_idx summarization completed" count = node.safe_psql( "select count(*) from brin_page_items(get_raw_page('brin_wi_idx', 2), 'brin_wi_idx'::regclass)\n\t where not placeholder;" ) - assert int(count) > 1, "$count brin_wi_idx ranges got summarized" - node.poll_query_until( + assert int(count) > 1, f"{count} brin_wi_idx ranges got summarized" + assert node.poll_query_until( "select count(*) > 1 from brin_page_items(get_raw_page('brin_packdate_idx', 2), 'brin_packdate_idx'::regclass)", expected="t", - ) + ), "brin_packdate_idx summarization completed" count = node.safe_psql( "select count(*) from brin_page_items(get_raw_page('brin_packdate_idx', 2), 'brin_packdate_idx'::regclass)\n\t where not placeholder;" ) - assert int(count) > 1, "$count brin_packdate_idx ranges got summarized" + assert int(count) > 1, f"{count} brin_packdate_idx ranges got summarized" node.stop() diff --git a/src/test/modules/brin/pyt/test_02_wal_consistency.py b/src/test/modules/brin/pyt/test_02_wal_consistency.py index ccbc07ae522ae..48f01fe941be7 100644 --- a/src/test/modules/brin/pyt/test_02_wal_consistency.py +++ b/src/test/modules/brin/pyt/test_02_wal_consistency.py @@ -44,4 +44,4 @@ def test_02_wal_consistency(create_pg): + "')\n\twhere resource_manager = 'BRIN' AND\n\trecord_type ILIKE '%revmap%'" ) assert int(result.stdout) >= 1 - whiskey.wait_for_catchup(charlie) + whiskey.wait_for_replay_catchup(charlie) diff --git a/src/test/modules/commit_ts/pyt/test_003_standby_2.py b/src/test/modules/commit_ts/pyt/test_003_standby_2.py index 110ffa3e1298a..bc79273924132 100644 --- a/src/test/modules/commit_ts/pyt/test_003_standby_2.py +++ b/src/test/modules/commit_ts/pyt/test_003_standby_2.py @@ -52,4 +52,4 @@ def test_003_standby_2(create_pg): standby_ts = standby.safe_psql( "SELECT ts.* FROM pg_class, pg_xact_commit_timestamp(xmin) AS ts WHERE relname = 't11'" ) - assert standby_ts != "", "standby gives valid value ($standby_ts) after promotion" + assert standby_ts != "", f"standby gives valid value ({standby_ts}) after promotion" diff --git a/src/test/modules/test_checksums/pyt/test_006_pgbench_single.py b/src/test/modules/test_checksums/pyt/test_006_pgbench_single.py index 6fe39e7f0d65d..5cbc0248a4a06 100644 --- a/src/test/modules/test_checksums/pyt/test_006_pgbench_single.py +++ b/src/test/modules/test_checksums/pyt/test_006_pgbench_single.py @@ -12,7 +12,6 @@ import os import random import re -import subprocess import pytest @@ -46,12 +45,10 @@ def _start_bg_pgbench(node): if extended and dcu.cointoss(): cmd.append("-C") cmd.append("postgres") - return subprocess.Popen( # pylint: disable=consider-using-with - cmd, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) + # Resolve pgbench against the node's bindir and pass its connection env, so + # the just-built pgbench is found (it is not on the ambient PATH) and a + # launch failure is not silently swallowed by the discarded stderr. + return node.bin.popen(cmd) def _flip_data_checksums(node, state): diff --git a/src/test/modules/test_json_parser/pyt/test_002_inline.py b/src/test/modules/test_json_parser/pyt/test_002_inline.py index 30f95c21e6a3d..793c6f5a27520 100644 --- a/src/test/modules/test_json_parser/pyt/test_002_inline.py +++ b/src/test/modules/test_json_parser/pyt/test_002_inline.py @@ -10,6 +10,7 @@ size with no stderr; each invalid input must fail with the expected error. """ +import os import re import subprocess @@ -111,7 +112,7 @@ def _split_nul(text): return parts -def _run_case(exe, name, json_bytes, error, tmp_path): +def _run_case(exe, name, json_bytes, error, tmp_path, env): """Run one inline case across all chunk sizes and check each result.""" chunk = min(len(json_bytes), 64) fname = tmp_path / "inline.json" @@ -120,6 +121,7 @@ def _run_case(exe, name, json_bytes, error, tmp_path): exe + ["-r", str(chunk), str(fname)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, + env=env, check=False, ) stdout = _split_nul(proc.stdout.decode("latin-1")) @@ -143,8 +145,12 @@ def _run_case(exe, name, json_bytes, error, tmp_path): ) -def test_002_inline(tmp_path): +def test_002_inline(tmp_path, bindir): """Every inline JSON case parses correctly at all chunk sizes, all variants.""" + # Resolve the test_json_parser_incremental* binaries against the build's + # bindir (they are not on the ambient PATH in a build-tree run). + env = dict(os.environ) + env["PATH"] = str(bindir) + os.pathsep + env.get("PATH", "") for exe in _EXES: for name, json_bytes, error in _CASES: - _run_case(exe, name, json_bytes, error, tmp_path) + _run_case(exe, name, json_bytes, error, tmp_path, env) diff --git a/src/test/modules/test_misc/pyt/test_007_catcache_inval.py b/src/test/modules/test_misc/pyt/test_007_catcache_inval.py index df047e34c2c4a..7347b069a533f 100644 --- a/src/test/modules/test_misc/pyt/test_007_catcache_inval.py +++ b/src/test/modules/test_misc/pyt/test_007_catcache_inval.py @@ -33,7 +33,7 @@ def test_007_catcache_inval(create_pg): ) psql_session = node.background_psql("postgres") psql_session2 = node.background_psql("postgres") - psql_session.query( + psql_session.query_safe( "SELECT injection_points_set_local();\n SELECT injection_points_attach('catcache-list-miss-systable-scan-started', 'wait');" ) psql_session.query_until( @@ -43,9 +43,9 @@ def test_007_catcache_inval(create_pg): node.safe_psql( "CREATE FUNCTION foofunc() RETURNS integer AS $$ SELECT 123 $$ LANGUAGE SQL" ) - psql_session2.query( + psql_session2.query_safe( "SELECT injection_points_wakeup('catcache-list-miss-systable-scan-started');\n SELECT injection_points_detach('catcache-list-miss-systable-scan-started');" ) - psql_session.query("SELECT foofunc();") + psql_session.query_safe("SELECT foofunc();") assert psql_session.quit() == 0, "" assert psql_session2.quit() == 0, "" diff --git a/src/test/modules/test_slru/pyt/test_001_multixact.py b/src/test/modules/test_slru/pyt/test_001_multixact.py index de25bf65c0652..1cc8b88ad9d99 100644 --- a/src/test/modules/test_slru/pyt/test_001_multixact.py +++ b/src/test/modules/test_slru/pyt/test_001_multixact.py @@ -21,7 +21,7 @@ def test_001_multixact(create_pg): node.safe_psql("CREATE EXTENSION injection_points") node.safe_psql("CREATE EXTENSION test_slru") bg_psql = node.background_psql("postgres") - multi1 = bg_psql.query("SELECT test_create_multixact();") + multi1 = bg_psql.query_safe("SELECT test_create_multixact();") node.safe_psql( "SELECT injection_points_attach('multixact-create-from-members','wait');" ) diff --git a/src/test/modules/worker_spi/pyt/test_001_worker_spi.py b/src/test/modules/worker_spi/pyt/test_001_worker_spi.py index 3d68ce7bf42b9..50a44eb41af3a 100644 --- a/src/test/modules/worker_spi/pyt/test_001_worker_spi.py +++ b/src/test/modules/worker_spi/pyt/test_001_worker_spi.py @@ -33,7 +33,7 @@ def test_001_worker_spi(create_pg): result = node.safe_psql( "SELECT count(*) > 0 from pg_wait_events where type = 'Extension' and name = 'WorkerSpiMain';" ) - assert result, '"WorkerSpiMain" is reported in pg_wait_events' + assert result == "t", '"WorkerSpiMain" is reported in pg_wait_events' node.safe_psql("CREATE DATABASE mydb;") node.safe_psql("CREATE ROLE myrole SUPERUSER LOGIN;") node.safe_psql("CREATE EXTENSION worker_spi;", dbname="mydb") From 1ed7c838c7444fce391e5472d31b2fb8caadd37c Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 15 Jun 2026 14:44:42 -0400 Subject: [PATCH 34/36] pytest: framework robustness fixes from code review - libpq: PGresult.status() reports FATAL_ERROR for a NULL result (PQexec on OOM / lost connection) so sql() raises instead of silently returning None; guard connect()'s PQerrorMessage against a NULL pointer before decoding; clear the previous PGresult on the next exec() so a long poll loop on one connection does not accumulate unfreed results. - command.py: bound PgBin.result() with a timeout (default PG_TEST_TIMEOUT_DEFAULT) so a hung client program fails fast; assert pg_config succeeds in check_pg_config before using its output. - bgpsql.py: read the output buffers under the lock when formatting a timeout error; make query_safe fail on any non-empty stderr, matching PostgreSQL::Test::BackgroundPsql->query_safe (WARNING/NOTICE are fatal too). - _env.py: fix the docstring example (require_test_extras, plural). - pytest/Makefile: copyright year 1996-2026 to match the other Makefiles. --- src/test/pytest/Makefile | 2 +- src/test/pytest/libpq/result.py | 14 ++++++++++++-- src/test/pytest/libpq/session.py | 19 ++++++++++++++++--- src/test/pytest/pypg/_env.py | 2 +- src/test/pytest/pypg/bgpsql.py | 16 +++++++++------- src/test/pytest/pypg/command.py | 17 ++++++++++++++--- 6 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/test/pytest/Makefile b/src/test/pytest/Makefile index 2bdca96ccbee3..54e8086d24016 100644 --- a/src/test/pytest/Makefile +++ b/src/test/pytest/Makefile @@ -2,7 +2,7 @@ # # Makefile for pytest # -# Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/test/pytest/Makefile diff --git a/src/test/pytest/libpq/result.py b/src/test/pytest/libpq/result.py index 7fdb725a96da2..c1e75578e189b 100644 --- a/src/test/pytest/libpq/result.py +++ b/src/test/pytest/libpq/result.py @@ -45,10 +45,20 @@ def __init__(self, lib: ctypes.CDLL, res: _PGresult_p): # type: ignore[valid-ty self._res: Optional[_PGresult_p] = res # type: ignore[valid-type] def __exit__(self, *exc): - self._lib.PQclear(self._res) - self._res = None + self.close() + + def close(self): + """Free the underlying PGresult; idempotent.""" + if self._res is not None: + self._lib.PQclear(self._res) + self._res = None def status(self) -> ExecStatus: + # A NULL PGresult (PQexec on OOM / lost connection) has no status; report + # FATAL_ERROR so callers route it to raise_error() instead of mistaking + # PQresultStatus(NULL)==0 for an empty query and silently returning None. + if not self._res: + return ExecStatus.PGRES_FATAL_ERROR return ExecStatus(self._lib.PQresultStatus(self._res)) def error_message(self): diff --git a/src/test/pytest/libpq/session.py b/src/test/pytest/libpq/session.py index acb744b08971a..acaea63538d11 100644 --- a/src/test/pytest/libpq/session.py +++ b/src/test/pytest/libpq/session.py @@ -10,7 +10,7 @@ import contextlib import ctypes -from typing import Any, Callable, Dict +from typing import Any, Callable, Dict, Optional from .constants import ConnectionStatus, ExecStatus, _PGconn_p from .errors import LibpqError @@ -32,6 +32,9 @@ def __init__( # Cleared to None on __exit__ once the connection has been finished. self._handle = handle self._stack = stack + # The most recent result, cleared when the next exec() runs so a long + # polling loop on one connection does not accumulate unfreed PGresults. + self._last_result: Optional[PGresult] = None def __exit__(self, *exc): self._lib.PQfinish(self._handle) @@ -40,9 +43,18 @@ def __exit__(self, *exc): def exec(self, query: str) -> PGresult: """ Executes a query via PQexec() and returns a PGresult. + + The previous result from this connection is cleared first, so issuing + many queries on one connection (e.g. a poll loop) frees each PGresult + promptly rather than deferring all of them to end-of-test cleanup. """ + if self._last_result is not None: + self._last_result.close() + self._last_result = None res = self._lib.PQexec(self._handle, query.encode()) - return self._stack.enter_context(PGresult(self._lib, res)) + result = self._stack.enter_context(PGresult(self._lib, res)) + self._last_result = result + return result def sql(self, query: str): # pylint: disable=inconsistent-return-statements """ @@ -161,7 +173,8 @@ def connect( # Check connection status before adding to stack if libpq_handle.PQstatus(conn_p) != ConnectionStatus.CONNECTION_OK: - error_msg = libpq_handle.PQerrorMessage(conn_p).decode() + msg = libpq_handle.PQerrorMessage(conn_p) + error_msg = msg.decode() if msg else "connection failed (out of memory?)" # Manually close the failed connection libpq_handle.PQfinish(conn_p) raise LibpqError(error_msg) diff --git a/src/test/pytest/pypg/_env.py b/src/test/pytest/pypg/_env.py index 02e91a686ef70..1c5841263ea9a 100644 --- a/src/test/pytest/pypg/_env.py +++ b/src/test/pytest/pypg/_env.py @@ -34,7 +34,7 @@ def test_some_ldap_feature(): To skip an entire module: - pytestmark = pypg.require_test_extra("ssl", "kerberos") + pytestmark = pypg.require_test_extras("ssl", "kerberos") """ return pytest.mark.skipif( not all(_has_test_extra(k) for k in keys), diff --git a/src/test/pytest/pypg/bgpsql.py b/src/test/pytest/pypg/bgpsql.py index bb1a17ba43cf6..d485d9494fe20 100644 --- a/src/test/pytest/pypg/bgpsql.py +++ b/src/test/pytest/pypg/bgpsql.py @@ -123,15 +123,16 @@ def _pump_until(self, want_out=None, want_err=None): interval = 0.0005 while True: with self._lock: - ok_out = rx_out is None or rx_out.search(self._stdout) - ok_err = rx_err is None or rx_err.search(self._stderr) + stdout, stderr = self._stdout, self._stderr + ok_out = rx_out is None or rx_out.search(stdout) + ok_err = rx_err is None or rx_err.search(stderr) if ok_out and ok_err: return if time.monotonic() > deadline: raise TimeoutError( "background psql timed out waiting for " "out={!r} err={!r}\nstdout:\n{}\nstderr:\n{}".format( - want_out, want_err, self._stdout, self._stderr + want_out, want_err, stdout, stderr ) ) time.sleep(interval) @@ -165,13 +166,14 @@ def query(self, query): return output def query_safe(self, query): - """Run query and return its output, raising if psql reported an error. + """Run query and return its output, raising if psql wrote any stderr. - Mirrors PostgreSQL::Test::BackgroundPsql->query_safe: any ERROR/FATAL/ - PANIC on stderr from the statement is fatal to the test. + Mirrors PostgreSQL::Test::BackgroundPsql->query_safe, which dies on any + non-empty stderr from the statement (so a WARNING/NOTICE is fatal too, + not only ERROR/FATAL/PANIC). """ output = self.query(query) - if re.search(r"^(?:ERROR|FATAL|PANIC):", self._last_stderr, re.MULTILINE): + if self._last_stderr != "": raise RuntimeError( "query_safe failed: {}\nquery was: {}".format( self._last_stderr.strip(), query diff --git a/src/test/pytest/pypg/command.py b/src/test/pytest/pypg/command.py index 1cff64050c8aa..620ae946e9934 100644 --- a/src/test/pytest/pypg/command.py +++ b/src/test/pytest/pypg/command.py @@ -15,6 +15,7 @@ from dataclasses import dataclass from typing import Dict, List, Optional, Sequence +from ._env import test_timeout_default from .util import run_captured @@ -152,7 +153,7 @@ def _env(self, extra_env: Optional[Dict[str, str]]) -> Dict[str, str]: env.update(extra_env) return env - def result(self, cmd: Sequence, *, extra_env=None) -> CommandResult: + def result(self, cmd: Sequence, *, extra_env=None, timeout=None) -> CommandResult: """Run cmd, capturing output. Never raises on a nonzero exit. Output is captured through temporary files rather than subprocess pipes @@ -160,9 +161,17 @@ def result(self, cmd: Sequence, *, extra_env=None) -> CommandResult: pg_basebackup or pg_ctl start -- leaves a postmaster holding the pipe's write end open, which would deadlock a pipe read to EOF. + A timeout (defaulting to PG_TEST_TIMEOUT_DEFAULT) bounds the run so a + hung client program fails fast instead of stalling the whole test, the + same way server.psql/safe_psql and bgpsql are bounded. + Co-authored-by: Andrew Dunstan """ - returncode, stdout, stderr = run_captured(_argv(cmd), env=self._env(extra_env)) + if timeout is None: + timeout = test_timeout_default() + returncode, stdout, stderr = run_captured( + _argv(cmd), env=self._env(extra_env), timeout=timeout + ) return ProgramResult(returncode, stdout, stderr) def popen(self, cmd: Sequence, *, extra_env=None) -> subprocess.Popen: @@ -320,7 +329,9 @@ def check_pg_config(self, regexp): Mirrors PostgreSQL::Test::Utils::check_pg_config (the pattern is anchored at the start of the line). """ - includedir = self.result(["pg_config", "--includedir"]).stdout.strip() + result = self.result(["pg_config", "--includedir"]) + assert result.ok, "pg_config --includedir failed:\n{}".format(result) + includedir = result.stdout.strip() header = os.path.join(includedir, "pg_config.h") with open(header, encoding="utf-8", errors="replace") as f: return any(re.match(regexp, line) for line in f) From d2ec9a79c7e25f35c46487a2c7d19f916c6b1b5a Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 15 Jun 2026 14:45:12 -0400 Subject: [PATCH 35/36] pytest: make configure match configure.ac (drop the uv install step) The generated configure carried a hand-added "uv pip install $srcdir" / PYTEST="uv run pytest" branch that configure.ac never had, so the two were out of sync (the next autoreconf would silently revert it) and the step mutated the user's active Python environment during configure -- surprising and non- reproducible. Replace it with the "python -m pytest" fallback that configure.ac actually defines, and drop the now-unused UV substitution variable, so configure and configure.ac agree. uv stays a dev-only convenience, not part of the build's pytest detection. --- configure | 75 ++++++------------------------------------------------- 1 file changed, 8 insertions(+), 67 deletions(-) diff --git a/configure b/configure index dd53ac60acfaf..8e0a54e1cb94b 100755 --- a/configure +++ b/configure @@ -630,7 +630,6 @@ vpath_build PG_SYSROOT PG_VERSION_NUM LDFLAGS_EX_BE -UV PYTEST PROVE DBTOEPUB @@ -19612,74 +19611,16 @@ $as_echo "$PYTEST" >&6; } fi if test -z "$PYTEST"; then - # If pytest not found, try installing with uv - if test -z "$UV"; then - for ac_prog in uv -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_path_UV+:} false; then : - $as_echo_n "(cached) " >&6 -else - case $UV in - [\\/]* | ?:[\\/]*) - ac_cv_path_UV="$UV" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_path_UV="$as_dir/$ac_word$ac_exec_ext" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - ;; -esac -fi -UV=$ac_cv_path_UV -if test -n "$UV"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $UV" >&5 -$as_echo "$UV" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$UV" && break -done - -else - # Report the value of UV in configure's output in all cases. - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for UV" >&5 -$as_echo_n "checking for UV... " >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $UV" >&5 -$as_echo "$UV" >&6; } -fi - - if test -n "$UV"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether uv can install pytest dependencies" >&5 -$as_echo_n "checking whether uv can install pytest dependencies... " >&6; } - if "$UV" pip install "$srcdir" >&5 2>&1; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 + # Try python -m pytest as a fallback + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether python -m pytest works" >&5 +$as_echo_n "checking whether python -m pytest works... " >&6; } + if "$PYTHON" -m pytest --version >&5 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } - PYTEST="$UV run pytest" - else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - as_fn_error $? "pytest not found and uv failed to install dependencies" "$LINENO" 5 - fi + PYTEST="$PYTHON -m pytest" else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } as_fn_error $? "pytest not found" "$LINENO" 5 fi fi From 2390d137798cfb83b4d24b6e9266d3093afee9ba Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Mon, 15 Jun 2026 15:41:07 -0400 Subject: [PATCH 36/36] pytest: fix correctness bugs and fidelity gaps from the second review pass Real bugs: - psql/001: use current_log_position() (character offset) instead of log.stat().st_size (raw byte size) for the wait_for_log offset, matching the framework's CRLF-safe convention. - pg_waldump/002: unpack the page LSN with native byte order ("=II"), not little-endian -- pd_lsn is stored in host byte order (Perl uses unpack('LL')). - pgbench/001: the concurrent GRANT/VACUUM block is a tolerated PROC_IN_VACUUM TODO; warn on its failure instead of a bare except-pass that hid every other regression in that path. Resource leaks: - pg_rewind/007 and pg_verifybackup/005: use the auto-cleaned tmp_path fixture instead of an unremoved tempfile.mkdtemp(). pg_verifybackup/005 also drops the needless full-server start in favor of the lighter pg_bin fixture and pathlib. - pg_basebackup/020: set the restrictive umask via a save/restore fixture so it does not leak into later tests in the same process. Fidelity gaps: - postgres_fdw/010: pass the publisher node and subscription to wait_for_subscription_sync so the publisher-catchup step is not skipped. - pg_basebackup/030: port the output-file permission-derivation checks (0600 vs 0640, Windows-guarded) and the final --drop-slot from the Perl original. - pg_dump/006: drop unmatched glob patterns (Perl glob() semantics) instead of passing the literal "*" pattern to the compression program. - pg_dump/010: read PG_REGRESS defensively (skip if unset) and drop the unused "plain" parameter from _restore_full_dump. --- .../postgres_fdw/pyt/test_010_subscription.py | 2 +- .../pyt/test_020_pg_receivewal.py | 18 +++++- .../pyt/test_030_pg_recvlogical.py | 60 +++++++++++++++++++ .../pg_dump/pyt/test_006_pg_dump_compress.py | 11 +++- src/bin/pg_dump/pyt/test_010_dump_connstr.py | 16 ++--- .../pg_rewind/pyt/test_007_standby_source.py | 5 +- .../pyt/test_005_bad_manifest.py | 11 ++-- .../pg_waldump/pyt/test_002_save_fullpage.py | 5 +- .../pyt/test_001_pgbench_with_server.py | 13 +++- src/bin/psql/pyt/test_001_basic.py | 2 +- 10 files changed, 117 insertions(+), 26 deletions(-) diff --git a/contrib/postgres_fdw/pyt/test_010_subscription.py b/contrib/postgres_fdw/pyt/test_010_subscription.py index 511b46195eee5..584ce74c22c70 100644 --- a/contrib/postgres_fdw/pyt/test_010_subscription.py +++ b/contrib/postgres_fdw/pyt/test_010_subscription.py @@ -34,7 +34,7 @@ def test_010_subscription(create_pg): node_subscriber.safe_psql( "CREATE SUBSCRIPTION tap_sub SERVER tap_server PUBLICATION tap_pub WITH (password_required=false)" ) - node_subscriber.wait_for_subscription_sync() + node_subscriber.wait_for_subscription_sync(node_publisher, "tap_sub") result = node_subscriber.safe_psql("SELECT MAX(a) FROM tab_ins") assert result == "1002", "check that initial data was copied to subscriber" node_publisher.safe_psql( diff --git a/src/bin/pg_basebackup/pyt/test_020_pg_receivewal.py b/src/bin/pg_basebackup/pyt/test_020_pg_receivewal.py index f1528b8b39d28..8231fc5632268 100644 --- a/src/bin/pg_basebackup/pyt/test_020_pg_receivewal.py +++ b/src/bin/pg_basebackup/pyt/test_020_pg_receivewal.py @@ -13,6 +13,8 @@ import os import re +import pytest + import pypg @@ -22,9 +24,23 @@ def _glob1(pattern, msg): return matches[0] +@pytest.fixture(autouse=True) +def _restrictive_umask(): + """Run this test under a 0o077 umask, restoring the previous value after. + + The Perl original sets umask 0077 so pg_receivewal's output files have + predictable permissions. pytest runs many tests in one process, so the + umask must be restored or it leaks into later tests' permission checks. + """ + prev_mask = os.umask(0o077) + try: + yield + finally: + os.umask(prev_mask) + + def test_020_pg_receivewal(create_pg, pg_bin): """pg_receivewal usage, slots, compression, restart_lsn, and timeline jump.""" - os.umask(0o077) pg_bin.program_help_ok("pg_receivewal") pg_bin.program_version_ok("pg_receivewal") pg_bin.program_options_handling_ok("pg_receivewal") diff --git a/src/bin/pg_basebackup/pyt/test_030_pg_recvlogical.py b/src/bin/pg_basebackup/pyt/test_030_pg_recvlogical.py index f9d39fdddac92..63a0d254207c3 100644 --- a/src/bin/pg_basebackup/pyt/test_030_pg_recvlogical.py +++ b/src/bin/pg_basebackup/pyt/test_030_pg_recvlogical.py @@ -8,7 +8,10 @@ resumes, ultimately writing both committed INSERTs to its output file. """ +import os +import platform import signal +import stat import pypg @@ -200,3 +203,60 @@ def _reconnect_test(node): outfiledata = pypg.slurp_file(outfile) count = outfiledata.count("INSERT") assert count == 2, "pg_recvlogical has received and written two INSERTs" + + # pg_recvlogical derives output-file permissions from the source cluster. + # Unix-style permissions are not supported on Windows (cf. the Perl SKIP). + if platform.system() != "Windows": + # The cluster was initialized without group access, so the output file + # should be 0600. + mode = stat.S_IMODE(os.stat(outfile).st_mode) + assert mode == 0o600, ( + "pg_recvlogical output file has no group permissions (0600), " + "got {:04o}".format(mode) + ) + + # Enable group access on the source cluster and restart so + # pg_recvlogical observes the updated source-cluster permissions. + node.stop() + pypg.chmod_recursive(node.datadir, 0o750, 0o640) + node.start() + + group_outfile = "{}/group_access.out".format(node.basedir) + group_cmd = [ + "pg_recvlogical", + "--slot", + "reconnect_test", + "--dbname", + _cs(node), + "--start", + "--file", + group_outfile, + "--fsync-interval", + "1", + ] + group_recv = node.bin.popen(group_cmd) + try: + node.safe_psql("INSERT INTO test_table VALUES (3)") + pypg.wait_for_file(group_outfile, r"INSERT") + finally: + group_recv.send_signal(signal.SIGTERM) + group_recv.wait() + + # With group access on the source cluster, the output file is 0640. + mode = stat.S_IMODE(os.stat(group_outfile).st_mode) + assert mode == 0o640, ( + "pg_recvlogical output file respects group permissions (0640), " + "got {:04o}".format(mode) + ) + + node.command_ok( + [ + "pg_recvlogical", + "--slot", + "reconnect_test", + "--dbname", + _cs(node), + "--drop-slot", + ], + "reconnect_test slot dropped", + ) diff --git a/src/bin/pg_dump/pyt/test_006_pg_dump_compress.py b/src/bin/pg_dump/pyt/test_006_pg_dump_compress.py index 0268b88fc3a03..4206c37703dcc 100644 --- a/src/bin/pg_dump/pyt/test_006_pg_dump_compress.py +++ b/src/bin/pg_dump/pyt/test_006_pg_dump_compress.py @@ -310,8 +310,15 @@ def _run_compress_cmd(node, run, spec): return False full = [program] for arg in compress["args"]: - matches = globmod.glob(arg) - full += matches if matches else [arg] + if globmod.has_magic(arg): + # A glob pattern: include its matches, and drop it entirely when it + # matches nothing (Perl's glob() returns an empty list, so unmatched + # patterns like blobs_*.toc are silently dropped -- never passed + # through literally with the '*' intact). + full += globmod.glob(arg) + else: + # A flag or concrete path: always pass it through unchanged. + full.append(arg) result = subprocess.run(full, capture_output=True, check=False) assert result.returncode == 0, "{}: compression commands\n{}".format( run, result.stderr.decode("utf-8", "replace") diff --git a/src/bin/pg_dump/pyt/test_010_dump_connstr.py b/src/bin/pg_dump/pyt/test_010_dump_connstr.py index dd8dcfd61d984..7addd9ae4e7a0 100644 --- a/src/bin/pg_dump/pyt/test_010_dump_connstr.py +++ b/src/bin/pg_dump/pyt/test_010_dump_connstr.py @@ -12,6 +12,8 @@ import os import subprocess +import pytest + # Source/destination bootstrap superusers (plain ASCII). _SRC_SUPER = "regress_postgres" _DST_SUPER = "boot" @@ -71,7 +73,9 @@ def _latin1_env(node, **extra): def _config_auth_roles(node, super_user, roles): """Run pg_regress --config-auth creating the given roles (bytes-safe).""" - pg_regress = os.environ["PG_REGRESS"] + pg_regress = os.environ.get("PG_REGRESS") + if not pg_regress: + pytest.skip("PG_REGRESS not set") roles_arg = b",".join(roles) subprocess.run( [ @@ -119,7 +123,7 @@ def _dumpall_roles_only(node, dbname, username, no_sync, msg, discard): ) -def _restore_full_dump(create_pg, name, plain, restore_super): +def _restore_full_dump(create_pg, name, restore_super): """Init a fresh LATIN1 node, create the restore super, return the node.""" node = create_pg( name, @@ -279,9 +283,7 @@ def test_010_dump_connstr(create_pg, pg_bin): restore_super = "regress_a'b\\c=d\\ne\"f" # Restore through psql using environment variables for connection params. - envar_node = _restore_full_dump( - create_pg, "destination_envar", plain, restore_super - ) + envar_node = _restore_full_dump(create_pg, "destination_envar", restore_super) result = pg_bin.result( ["psql", "--no-psqlrc", "--file", plain], extra_env=_envar_restore_env(envar_node, restore_super), @@ -292,9 +294,7 @@ def test_010_dump_connstr(create_pg, pg_bin): assert result.stderr == "", "no dump errors" # Restore through psql using command-line connection params. - cmdline_node = _restore_full_dump( - create_pg, "destination_cmdline", plain, restore_super - ) + cmdline_node = _restore_full_dump(create_pg, "destination_cmdline", restore_super) result = pg_bin.result( [ "psql", diff --git a/src/bin/pg_rewind/pyt/test_007_standby_source.py b/src/bin/pg_rewind/pyt/test_007_standby_source.py index 2808044c7821a..21e7a6bc55832 100644 --- a/src/bin/pg_rewind/pyt/test_007_standby_source.py +++ b/src/bin/pg_rewind/pyt/test_007_standby_source.py @@ -10,12 +10,11 @@ import os import shutil -import tempfile -def test_007_standby_source(rewind_test, create_pg): +def test_007_standby_source(rewind_test, create_pg, tmp_path): """pg_rewind uses a (cascading) standby as its source server.""" - tmp_folder = tempfile.mkdtemp(prefix="standbysrc_") + tmp_folder = str(tmp_path) rewind_test.setup_cluster("a") rewind_test.start_primary() node_a = rewind_test.primary diff --git a/src/bin/pg_verifybackup/pyt/test_005_bad_manifest.py b/src/bin/pg_verifybackup/pyt/test_005_bad_manifest.py index d22ee94cc91bf..809d4e4496de5 100644 --- a/src/bin/pg_verifybackup/pyt/test_005_bad_manifest.py +++ b/src/bin/pg_verifybackup/pyt/test_005_bad_manifest.py @@ -11,7 +11,6 @@ """ import re -import tempfile # (kind, description, manifest_contents). kind: 'parse' -> "could not parse @@ -159,17 +158,15 @@ ] -def test_005_bad_manifest(create_pg): +def test_005_bad_manifest(pg_bin, tmp_path): """pg_verifybackup reports the right diagnostic for each malformed manifest.""" - primary = create_pg("primary") - tempdir = tempfile.mkdtemp(prefix="badmf_") + manifest = tmp_path / "backup_manifest" for kind, desc, contents in _CASES: - with open("{}/backup_manifest".format(tempdir), "w", encoding="utf-8") as fh: - fh.write(contents) + manifest.write_text(contents, encoding="utf-8") if kind == "parse": pattern = r"could not parse backup manifest: " + re.escape(desc) elif kind == "fatal": pattern = r"error: " + re.escape(desc) else: pattern = desc - primary.command_fails_like(["pg_verifybackup", tempdir], pattern, desc) + pg_bin.command_fails_like(["pg_verifybackup", str(tmp_path)], pattern, desc) diff --git a/src/bin/pg_waldump/pyt/test_002_save_fullpage.py b/src/bin/pg_waldump/pyt/test_002_save_fullpage.py index 31a54bc881f77..bda50bc7d341f 100644 --- a/src/bin/pg_waldump/pyt/test_002_save_fullpage.py +++ b/src/bin/pg_waldump/pyt/test_002_save_fullpage.py @@ -24,7 +24,10 @@ def _get_block_lsn(path, blocksize): with open(path, "rb") as fh: block = fh.read(blocksize) assert len(block) == blocksize, "could not read block" - lsn_hi, lsn_lo = struct.unpack("