diff --git a/contrib/pg_buffercache/Makefile b/contrib/pg_buffercache/Makefile index 0e618f66aec6e..566cc91118ba4 100644 --- a/contrib/pg_buffercache/Makefile +++ b/contrib/pg_buffercache/Makefile @@ -5,6 +5,9 @@ OBJS = \ $(WIN32RES) \ pg_buffercache_pages.o +EXTRA_INSTALL=src/test/modules/injection_points \ + contrib/test_decoding + EXTENSION = pg_buffercache DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \ pg_buffercache--1.1--1.2.sql pg_buffercache--1.0--1.1.sql \ @@ -13,6 +16,7 @@ DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \ PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time" REGRESS = pg_buffercache pg_buffercache_numa +TAP_TESTS = 1 ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/pg_buffercache/expected/pg_buffercache.out b/contrib/pg_buffercache/expected/pg_buffercache.out index 8cdad3a7117b0..513953bbd0657 100644 --- a/contrib/pg_buffercache/expected/pg_buffercache.out +++ b/contrib/pg_buffercache/expected/pg_buffercache.out @@ -1,5 +1,5 @@ CREATE EXTENSION pg_buffercache; -select pg_size_bytes(setting)/(select setting::bigint from pg_settings where name = 'block_size') AS nbuffers +select setting::bigint AS nbuffers from pg_settings where name = 'shared_buffers' \gset diff --git a/contrib/pg_buffercache/meson.build b/contrib/pg_buffercache/meson.build index e681205abb2d8..019ecf091afa7 100644 --- a/contrib/pg_buffercache/meson.build +++ b/contrib/pg_buffercache/meson.build @@ -38,5 +38,13 @@ tests += { 'pg_buffercache', 'pg_buffercache_numa', ], - }, + }, + 'tap': { + 'env': { + 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', + }, + 'tests': [ + 't/001_basic.pl', + ], + } } diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index 8a17319ff2a0a..745d07f836875 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -17,6 +17,7 @@ #include "storage/bufmgr.h" #include "utils/rel.h" #include "utils/tuplestore.h" +#include "utils/injection_point.h" #define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8 @@ -199,6 +200,11 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) * snapshot across all buffers, but we do grab the buffer header * locks, so the information of each buffer is self-consistent. */ + + /* + * This point fails when lock bufHdr fails later because of invalid buffer after resize. + */ + INJECTION_POINT("pg-buffercache-scan-start", NULL); for (i = 0; i < currentNBuffers; i++) { BufferDesc *bufHdr; @@ -212,17 +218,27 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) * happen if only we setup the descriptor array large enough at * the server startup time. */ - if (currentNBuffers != pg_atomic_read_u32(&ShmemCtrl->currentNBuffers)) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("number of shared buffers changed during scan of buffer cache"))); + // if (currentNBuffers != pg_atomic_read_u32(&ShmemCtrl->currentNBuffers)) + // ereport(ERROR, + // (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + // errmsg("number of shared buffers changed during scan of buffer cache"))); + elog(DEBUG1, "scanning buffer %d", i); + bufHdr = GetBufferDescriptor(i); - /* Lock each buffer header before inspecting. */ - buf_state = LockBufHdr(bufHdr); - + + /* Injection point during scan to test resize interaction during buffer resize and accessing invalid buffers after resize in case of shrinking */ + if(i==currentNBuffers/2) + INJECTION_POINT("pg-buffercache-after-getdesc", NULL); + /* + * Point of failure is when invalid buffer is accessed after resize. + * All the places where bufHdr is being called. + * One injection point before locking buffer descriptor helps covers all the later cases. + */ + buf_state = LockBufHdr(bufHdr); + elog(DEBUG1, "got buffer descriptor for buffer %d", i); fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr); - fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag); + fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag); fctx->record[i].reltablespace = bufHdr->tag.spcOid; fctx->record[i].reldatabase = bufHdr->tag.dbOid; fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag); @@ -350,6 +366,7 @@ pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa) int max_entries; char *startptr, *endptr; + int currentNBuffers = pg_atomic_read_u32(&ShmemCtrl->currentNBuffers); /* If NUMA information is requested, initialize NUMA support. */ if (include_numa && pg_numa_init() == -1) @@ -392,7 +409,7 @@ pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa) startptr = (char *) TYPEALIGN_DOWN(os_page_size, BufferGetBlock(1)); endptr = (char *) TYPEALIGN(os_page_size, - (char *) BufferGetBlock(NBuffers) + BLCKSZ); + (char *) BufferGetBlock(currentNBuffers) + BLCKSZ); os_page_count = (endptr - startptr) / os_page_size; /* Used to determine the NUMA node for all OS pages at once */ @@ -418,7 +435,7 @@ pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa) Assert(idx == os_page_count); elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " " - "os_page_size=%zu", NBuffers, os_page_count, os_page_size); + "os_page_size=%zu", currentNBuffers, os_page_count, os_page_size); /* * If we ever get 0xff back from kernel inquiry, then we probably @@ -467,7 +484,7 @@ pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa) * without reallocating memory. */ pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1; - max_entries = NBuffers * pages_per_buffer; + max_entries = currentNBuffers * pages_per_buffer; /* Allocate entries for BufferCacheOsPagesRec records. */ fctx->record = (BufferCacheOsPagesRec *) @@ -490,7 +507,7 @@ pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa) */ startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1)); idx = 0; - for (i = 0; i < NBuffers; i++) + for (i = 0; i < currentNBuffers; i++) { char *buffptr = (char *) BufferGetBlock(i + 1); BufferDesc *bufHdr; @@ -501,6 +518,11 @@ pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa) CHECK_FOR_INTERRUPTS(); + if (currentNBuffers != pg_atomic_read_u32(&ShmemCtrl->currentNBuffers)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("number of shared buffers changed during scan of buffer cache"))); + bufHdr = GetBufferDescriptor(i); /* Lock each buffer header before inspecting. */ @@ -632,17 +654,23 @@ pg_buffercache_summary(PG_FUNCTION_ARGS) int32 buffers_dirty = 0; int32 buffers_pinned = 0; int64 usagecount_total = 0; + int currentNBuffers = pg_atomic_read_u32(&ShmemCtrl->currentNBuffers); if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - for (int i = 0; i < NBuffers; i++) + for (int i = 0; i < currentNBuffers; i++) { BufferDesc *bufHdr; uint64 buf_state; CHECK_FOR_INTERRUPTS(); + if (currentNBuffers != pg_atomic_read_u32(&ShmemCtrl->currentNBuffers)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("number of shared buffers changed during scan of buffer cache"))); + /* * This function summarizes the state of all headers. Locking the * buffer headers wouldn't provide an improved result as the state of @@ -694,10 +722,11 @@ pg_buffercache_usage_counts(PG_FUNCTION_ARGS) int pinned[BM_MAX_USAGE_COUNT + 1] = {0}; Datum values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM]; bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0}; + int currentNBuffers = pg_atomic_read_u32(&ShmemCtrl->currentNBuffers); InitMaterializedSRF(fcinfo, 0); - for (int i = 0; i < NBuffers; i++) + for (int i = 0; i < currentNBuffers; i++) { BufferDesc *bufHdr = GetBufferDescriptor(i); uint64 buf_state = pg_atomic_read_u64(&bufHdr->state); @@ -705,6 +734,11 @@ pg_buffercache_usage_counts(PG_FUNCTION_ARGS) CHECK_FOR_INTERRUPTS(); + if (currentNBuffers != pg_atomic_read_u32(&ShmemCtrl->currentNBuffers)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("number of shared buffers changed during scan of buffer cache"))); + usage_count = BUF_STATE_GET_USAGECOUNT(buf_state); usage_counts[usage_count]++; @@ -755,13 +789,15 @@ pg_buffercache_evict(PG_FUNCTION_ARGS) Buffer buf = PG_GETARG_INT32(0); bool buffer_flushed; + int currentNBuffers = pg_atomic_read_u32(&ShmemCtrl->currentNBuffers); + if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); pg_buffercache_superuser_check("pg_buffercache_evict"); - if (buf < 1 || buf > NBuffers) + if (buf < 1 || buf > currentNBuffers) elog(ERROR, "bad buffer ID: %d", buf); values[0] = BoolGetDatum(EvictUnpinnedBuffer(buf, &buffer_flushed)); @@ -987,4 +1023,4 @@ pg_buffercache_lookup_table_entries(PG_FUNCTION_ARGS) BufTableGetContents(rsinfo->setResult, rsinfo->setDesc); return (Datum) 0; -} +} \ No newline at end of file diff --git a/contrib/pg_buffercache/sql/pg_buffercache.sql b/contrib/pg_buffercache/sql/pg_buffercache.sql index 949c3f8d000df..b202deaf2aded 100644 --- a/contrib/pg_buffercache/sql/pg_buffercache.sql +++ b/contrib/pg_buffercache/sql/pg_buffercache.sql @@ -1,6 +1,6 @@ CREATE EXTENSION pg_buffercache; -select pg_size_bytes(setting)/(select setting::bigint from pg_settings where name = 'block_size') AS nbuffers +select setting::bigint AS nbuffers from pg_settings where name = 'shared_buffers' \gset diff --git a/contrib/pg_buffercache/t/001_basic.pl b/contrib/pg_buffercache/t/001_basic.pl new file mode 100644 index 0000000000000..d874b73d1268f --- /dev/null +++ b/contrib/pg_buffercache/t/001_basic.pl @@ -0,0 +1,244 @@ +# Copyright (c) 2025-2025, PostgreSQL Global Development Group +# +# Test shared_buffer resizing coordination with client connections joining using injection points + +use strict; +use warnings; +use IPC::Run; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Time::HiRes qw(sleep); + +# Function to calculate expected buffer count from size string +sub calculate_buffer_count +{ + my ($size_string, $block_size) = @_; + + # Parse size and convert to bytes + my ($size_val, $unit) = ($size_string =~ /(\d+)(\w+)/); + my $size_bytes; + if (lc($unit) eq 'kb') { + $size_bytes = $size_val * 1024; + } elsif (lc($unit) eq 'mb') { + $size_bytes = $size_val * 1024 * 1024; + } elsif (lc($unit) eq 'gb') { + $size_bytes = $size_val * 1024 * 1024 * 1024; + } else { + # Default to kB if unit is not recognized + $size_bytes = $size_val * 1024; + } + + return int($size_bytes / $block_size); +} + +# Initialize cluster with very small buffer sizes for testing +my $node = PostgreSQL::Test::Cluster->new('main'); +my $shared_buffers_initial = '128MB'; +$node->init; + +# Configure for buffer resizing with very small buffer pool sizes for faster tests. +$node->append_conf('postgresql.conf', 'shared_preload_libraries = injection_points'); +$node->append_conf('postgresql.conf', qq{ +max_shared_buffers = '128MB' +shared_buffers = $shared_buffers_initial +max_parallel_workers_per_gather = 0 +restart_after_crash = off +log_min_messages = debug1 +}); + +$node->start; + +# Enable injection points +$node->safe_psql('postgres', "CREATE EXTENSION injection_points"); + +# Get the block size (this is fixed for the binary) +my $block_size = $node->safe_psql('postgres', "SHOW block_size"); + +# Try to create pg_buffercache extension for buffer analysis +eval { + $node->safe_psql('postgres', "CREATE EXTENSION pg_buffercache"); +}; +if ($@) { + $node->stop; + plan skip_all => 'pg_buffercache extension not available - cannot verify buffer usage'; +} + +# Create dedicated sessions for injection point handling and test queries, +# so that we don't create new backends for test operations after starting +# resize operation. Only one backend, which tests new backend synchronization +# with resizing operation, should start after resizing has commenced. +my $injection_session = $node->background_psql('postgres'); +my $query_session = $node->background_psql('postgres'); +my $resize_session = $node->background_psql('postgres'); + +# Function to run a single injection point test +sub run_injection_point_test +{ + my ($test_name, $injection_point, $target_size, $operation_type) = @_; + + note("Test with $test_name ($operation_type)"); + + # Update buffer pool size and wait for it to reflect pending state + $resize_session->query_safe("ALTER SYSTEM SET shared_buffers = '$target_size'"); + $resize_session->query_safe("SELECT pg_reload_conf()"); + my $pending_size_str = "pending: $target_size"; + $resize_session->poll_query_until("SELECT substring(current_setting('shared_buffers'), '$pending_size_str')", $pending_size_str); + + # Set up injection point in injection session + $injection_session->query_safe("SELECT injection_points_attach('$injection_point', 'wait')"); + # Trigger resize + $resize_session->query_until( + qr/starting_resize/, + q( + \echo starting_resize + SELECT pg_resize_shared_buffers(); + ) + ); + + # Wait until resize actually reaches the injection point using the query session + $query_session->wait_for_event('client backend', $injection_point); + + # Start bufferscan while resize is paused + my $client = $node->background_psql('postgres'); + + # Wake up the injection point from injection session + $injection_session->query_safe("SELECT injection_points_wakeup('$injection_point')"); + + # Wait for the resize operation to complete + $resize_session->query(q(\echo 'done')); + + # Detach injection point from injection session + $injection_session->query_safe("SELECT injection_points_detach('$injection_point')"); + + # Verify resize completed successfully + is($query_session->query_safe("SELECT current_setting('shared_buffers')"), $target_size, + "resize completed successfully to $target_size"); + + # Check buffer pool size using pg_buffercache after resize completion + is($query_session->query_safe("SELECT COUNT(*) FROM pg_buffercache"), calculate_buffer_count($target_size, $block_size), "pg_buffercache COUNT(*) correct after $test_name ($operation_type)"); + + # Wait for client to complete + ok($client->quit, "client succeeded during $test_name ($operation_type)"); +} + +# Test injection points during buffer resize with client connections +my @common_injection_tests = ( + { + name => 'flag setting phase', + injection_point => 'pg-resize-shared-buffers-flag-set', + }, + { + name => 'memory remap phase', + injection_point => 'pgrsb-after-shmem-resize', + }, + { + name => 'resize map barrier complete', + injection_point => 'pgrsb-resize-barrier-sent', + }, +); + +# Test common injection points for both shrinking and expanding +foreach my $test (@common_injection_tests) +{ + # Test shrinking scenario + run_injection_point_test($test->{name}, $test->{injection_point}, '272kB', 'shrinking'); + + # Test expanding scenario + run_injection_point_test($test->{name}, $test->{injection_point}, '400kB', 'expanding'); +} + +my @shrink_only_tests = ( + { + name => 'shrink barrier complete', + injection_point => 'pgrsb-shrink-barrier-sent', + size => '200kB', + } +); +foreach my $test (@shrink_only_tests) +{ + run_injection_point_test($test->{name}, $test->{injection_point}, $test->{size}, 'shrinking only'); +} + +my @expand_only_tests = ( + { + name => 'expand barrier complete', + injection_point => 'pgrsb-expand-barrier-sent', + size => '416kB', + } +); +foreach my $test (@expand_only_tests) +{ + run_injection_point_test($test->{name}, $test->{injection_point}, $test->{size}, 'expanding only'); +} + +# Function to test buffercache scan behavior during resize operations +# This tests that pg_buffercache correctly handles concurrent resize operations +# by pausing the buffercache scan at various points while a resize occurs. +# The expected behavior is that pg_buffercache detects the resize and raises +# an appropriate error "number of shared buffers changed during scan". +sub run_buffercache_injection_test +{ + my ($test_name, $buffercache_injection_point, $target_size, $operation_type) = @_; + + note("Test buffercache with $test_name ($operation_type)"); + + # Attach injection point at middle of buffercache scan + $node->safe_psql('postgres', "SELECT injection_points_attach('$buffercache_injection_point', 'wait')"); + + # Start buffercache query in background - it will pause at injection point + my $buffercache_session = $node->background_psql('postgres'); + $buffercache_session->query_until( + qr/starting_buffercache/, + q( + \echo starting_buffercache + SELECT COUNT(*) FROM pg_buffercache; + ) + ); + + # Wait for buffercache to reach injection point + $node->wait_for_event('client backend', $buffercache_injection_point); + + # Change shared_buffers to target size and resize + $node->safe_psql('postgres', "ALTER SYSTEM SET shared_buffers = '$target_size'"); + $node->safe_psql('postgres', "SELECT pg_reload_conf()"); + $node->safe_psql('postgres', "SELECT pg_resize_shared_buffers()"); + + # Wake up buffercache scan + $node->safe_psql('postgres', "SELECT injection_points_wakeup('$buffercache_injection_point')"); + + $buffercache_session->query(q(\echo 'done')); + eval { $buffercache_session->quit; }; + eval { $node->safe_psql('postgres', "SELECT injection_points_detach('$buffercache_injection_point')"); }; + + # Verify server is still running + my $result; + eval { $result = $node->safe_psql('postgres', "SELECT COUNT(*) FROM pg_buffercache"); }; + is($result, calculate_buffer_count($target_size, $block_size), "Server still running after $test_name ($operation_type)"); +} + +# Test buffercache injection points - pausing buffercache while resize occurs +my @buffercache_injection_tests = ( + # { + # name => 'before the buffer pool scan starts', + # injection_point => 'pg-buffercache-scan-start', + # }, # Basic fail where after buffer change there are valid buffers (NOTE : Buffer fails after a little later then actual currentNBuffers Why?) + { + name => 'before getting buffer description - 2', + injection_point => 'pg-buffercache-after-getdesc', + }, # Failure where after buffer change there are no valid buffers; + +foreach my $test (@buffercache_injection_tests) +{ + # Test with shrinking + run_buffercache_injection_test($test->{name}, $test->{injection_point}, '256kB', 'shrinking'); + + # Test with expanding + run_buffercache_injection_test($test->{name}, $test->{injection_point}, '384kB', 'expanding'); +} + +$injection_session->quit; +$query_session->quit; +$resize_session->quit; + +done_testing(); \ No newline at end of file diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 0aae2c08bc41d..15a8d196a93af 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -887,3 +887,12 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_r return true; } + +/* + * GetActiveBufferCount -- return the current active buffer count + */ +int +GetActiveBufferCount(void) +{ + return pg_atomic_read_u32(&StrategyControl->activeNBuffers); +} \ No newline at end of file diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 6bc35a1132b56..a05fb38d3a12c 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -5353,7 +5353,7 @@ ShowGUCOption(const struct config_generic *record, bool use_units) { const struct config_int *conf = &record->_int; - if (conf->show_hook) + if (conf->show_hook && use_units) val = conf->show_hook(); else { diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index c3daf4e2b3b25..c9d64a82a8556 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -576,6 +576,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno); extern Size StrategyShmemSize(void); extern void StrategyInitialize(bool init); extern void StrategyReset(int activeNBuffers); +extern int GetActiveBufferCount(void); /* buf_table.c */ extern Size BufTableShmemSize(int size);