From dcb803ce42387d3e3aaf88b736da92fcb116f6de Mon Sep 17 00:00:00 2001 From: Mitch MacKenzie Date: Thu, 23 Apr 2026 00:14:44 -0300 Subject: [PATCH 1/3] Vercel Blob for SQLite --- .github/workflows/e2e.yml | 3 + api/index.js | 138 +++++++------- api/vercel.js | 36 ++++ package.json | 1 + test/Dockerfile-blob | 13 ++ test/run-blob-test.sh | 90 +++++++++ test/vercel-blob-emulator/handler.js | 4 + test/vercel-blob-emulator/server.js | 164 +++++++++++++++++ test/vercel-blob-emulator/undici-patch.js | 44 +++++ util/install.js | 7 +- util/sqliteVercelBlob.js | 214 ++++++++++++++++++++++ vercel.json | 4 +- wp/wp-config.php | 4 +- 13 files changed, 646 insertions(+), 76 deletions(-) create mode 100644 api/vercel.js create mode 100644 test/Dockerfile-blob create mode 100755 test/run-blob-test.sh create mode 100644 test/vercel-blob-emulator/handler.js create mode 100644 test/vercel-blob-emulator/server.js create mode 100644 test/vercel-blob-emulator/undici-patch.js create mode 100644 util/sqliteVercelBlob.js diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index e15e7357a..207beb29e 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -14,4 +14,7 @@ jobs: working-directory: ./test - run: ./run-s3-test.sh + working-directory: ./ + + - run: ./run-blob-test.sh working-directory: ./test \ No newline at end of file diff --git a/api/index.js b/api/index.js index 0d62883f4..63a1ea4e8 100644 --- a/api/index.js +++ b/api/index.js @@ -7,89 +7,85 @@ const sandbox = require('../util/sandbox.js'); const readOnly = require('../util/readOnly.js'); const pathToWP = '/tmp/wp'; -let initSqliteS3 = false; +const wpContentPath = pathToWP + '/wp-content'; +const sqlitePluginPath = wpContentPath + '/plugins/sqlite-database-integration'; -// Move the /wp directory to /tmp/wp so that it is writeable. -setup(); - -// This is where all requests to WordPress are routed through. -// See vercel.json or netlify.toml for the redirection rules. -exports.handler = async function (event, context, callback) { - if ((process.env['SQLITE_S3_BUCKET'] || process.env['SERVERLESSWP_DATA_SECRET']) && !initSqliteS3) { - let wpContentPath = pathToWP + '/wp-content'; - let sqlitePluginPath = wpContentPath + '/plugins/sqlite-database-integration'; - await sqliteS3.prepPlugin(wpContentPath, sqlitePluginPath); - - let branchSlug = ''; - let bucketFallback = ''; - - // Vercel - if (process.env['VERCEL']) { - const branch = sqliteS3.branchNameToS3file(process.env['VERCEL_GIT_COMMIT_REF']); - branchSlug = branch ? '-' + branch : ''; - bucketFallback = process.env['VERCEL_PROJECT_ID']; - } +const hasSqliteS3 = !!(process.env['SQLITE_S3_BUCKET'] || process.env['SERVERLESSWP_DATA_SECRET']); +const readOnlyActive = !!process.env['SERVERLESSWP_READ_ONLY_MODE'] + && !['false', '0', 'no'].includes(process.env['SERVERLESSWP_READ_ONLY_MODE'].toLowerCase()); - // Configure the sqliteS3 plugin. - let sqliteS3Config = { - bucket: process.env['SQLITE_S3_BUCKET'] || bucketFallback, - file:`wp-sqlite-s3${branchSlug}.sqlite`, - S3Client: { - credentials: { - "accessKeyId": process.env['SQLITE_S3_API_KEY'] || process.env['VERCEL_PROJECT_ID'], - "secretAccessKey": process.env['SQLITE_S3_API_SECRET'] || process.env['SERVERLESSWP_DATA_SECRET'] - }, - region: process.env['SQLITE_S3_REGION'], - } - }; +let sqliteSelection = null; +let initDone = false; - if (process.env['SQLITE_S3_ENDPOINT']) { - sqliteS3Config.S3Client.endpoint = process.env['SQLITE_S3_ENDPOINT']; +function buildSqliteS3Config(overrides = {}) { + const config = { + bucket: overrides.bucket || process.env['SQLITE_S3_BUCKET'], + file: overrides.file || 'wp-sqlite-s3.sqlite', + S3Client: { + credentials: { + accessKeyId: overrides.accessKeyId || process.env['SQLITE_S3_API_KEY'], + secretAccessKey: process.env['SQLITE_S3_API_SECRET'] || process.env['SERVERLESSWP_DATA_SECRET'], + }, + region: process.env['SQLITE_S3_REGION'], } + }; - if (process.env['SQLITE_S3_FORCE_PATH_STYLE'] || process.env['SERVERLESSWP_DATA_SECRET']) { - sqliteS3Config.S3Client.forcePathStyle = true; - } - - if (process.env['SERVERLESSWP_DATA_SECRET']) { - sqliteS3Config.S3Client.endpoint = 'https://data.serverlesswp.com'; - sqliteS3Config.onAuthError = () => sandbox.register( - sqliteS3Config.bucket, - process.env['SERVERLESSWP_DATA_SECRET'] - ); - } - - sqliteS3.config(sqliteS3Config); - initSqliteS3 = true; + if (process.env['SQLITE_S3_ENDPOINT']) { + config.S3Client.endpoint = process.env['SQLITE_S3_ENDPOINT']; } - // Send the request (event object) to the serverlesswp library. - // It includes the PHP server that allows WordPress to handle the request. - let response = await serverlesswp({docRoot: pathToWP, event: event}); - - // Check to see if the database environment variables are in place. - let checkInstall = validate(response); - - if (checkInstall) { - return checkInstall; + if (process.env['SQLITE_S3_FORCE_PATH_STYLE'] || process.env['SERVERLESSWP_DATA_SECRET']) { + config.S3Client.forcePathStyle = true; } - else { - // Return the response for serving. - return response; + + if (process.env['SERVERLESSWP_DATA_SECRET']) { + config.S3Client.endpoint = 'https://data.serverlesswp.com'; + config.onAuthError = () => sandbox.register(config.bucket, process.env['SERVERLESSWP_DATA_SECRET']); } -} -if (process.env['SERVERLESSWP_READ_ONLY_MODE'] && !['false', '0', 'no'].includes(process.env['SERVERLESSWP_READ_ONLY_MODE'].toLowerCase())) { - // Register before sqliteS3 so blocked requests force a response before the S3 fetch. - serverlesswp.registerPlugin(readOnly); + return config; } -if (process.env['SQLITE_S3_BUCKET'] || process.env['SERVERLESSWP_DATA_SECRET']) { - // Register the sqlite serverlesswp plugin. - serverlesswp.registerPlugin(sqliteS3); +// Platform entry points (api/vercel.js) may call this before the first request +// to override which sqlite plugin gets used or pass a platform-tuned config. +function useSqlitePlugin(plugin, config) { + sqliteSelection = { plugin, config }; } -if (process.env['SERVERLESSWP_DATA_SECRET']) { - // Register the sandbox widget plugin. - serverlesswp.registerPlugin(sandbox); +exports.useSqlitePlugin = useSqlitePlugin; +exports.buildSqliteS3Config = buildSqliteS3Config; + +// Default: use sqliteS3 when its env vars are present. +if (hasSqliteS3) { + useSqlitePlugin(sqliteS3, buildSqliteS3Config()); } + +// Move the /wp directory to /tmp/wp so that it is writeable. +setup(); + +// This is where all requests to WordPress are routed through. +// See netlify.toml or serverless.yml for the redirection rules. +// On Vercel, requests come through api/vercel.js which then calls this handler. +exports.handler = async function (event, context, callback) { + if (!initDone) { + // Register readOnly first so blocked mutations short-circuit before the + // sqlite plugin tries to hit storage. + if (readOnlyActive) { + serverlesswp.registerPlugin(readOnly); + } + if (sqliteSelection) { + const { plugin, config } = sqliteSelection; + await plugin.prepPlugin(wpContentPath, sqlitePluginPath); + plugin.config(config); + serverlesswp.registerPlugin(plugin); + } + if (process.env['SERVERLESSWP_DATA_SECRET']) { + serverlesswp.registerPlugin(sandbox); + } + initDone = true; + } + + const response = await serverlesswp({ docRoot: pathToWP, event: event }); + const checkInstall = validate(response); + return checkInstall || response; +}; diff --git a/api/vercel.js b/api/vercel.js new file mode 100644 index 000000000..f3d536618 --- /dev/null +++ b/api/vercel.js @@ -0,0 +1,36 @@ +// Vercel-specific entry point. Adds Vercel Blob as a storage option and +// layers VERCEL_* env var fallbacks onto the SQLite + S3 config, then delegates +// to the core handler in api/index.js. + +const core = require('./index.js'); +const sqliteS3 = require('../util/sqliteS3.js'); +const sqliteVercelBlob = require('../util/sqliteVercelBlob.js'); + +const hasSqliteS3 = !!(process.env['SQLITE_S3_BUCKET'] || process.env['SERVERLESSWP_DATA_SECRET']); +const useVercelBlob = !!process.env['BLOB_READ_WRITE_TOKEN'] && !hasSqliteS3; + +// Encode the current Vercel git branch so each branch gets its own database. +function branchSlug() { + if (process.env['VERCEL_GIT_COMMIT_REF']) { + const branch = encodeURIComponent(process.env['VERCEL_GIT_COMMIT_REF']); + return branch ? '-' + branch : ''; + } + return ''; +} + +if (hasSqliteS3) { + // Re-configure the S3 plugin with Vercel-aware overrides: the sandbox flow + // uses VERCEL_PROJECT_ID as the bucket name and the API key fallback, and + // branch-aware filenames keep preview deploys isolated. + core.useSqlitePlugin(sqliteS3, core.buildSqliteS3Config({ + bucket: process.env['SQLITE_S3_BUCKET'] || process.env['VERCEL_PROJECT_ID'], + file: `wp-sqlite-s3${branchSlug()}.sqlite`, + accessKeyId: process.env['SQLITE_S3_API_KEY'] || process.env['VERCEL_PROJECT_ID'], + })); +} else if (useVercelBlob) { + core.useSqlitePlugin(sqliteVercelBlob, { + pathname: `wp-sqlite${branchSlug()}.sqlite`, + }); +} + +exports.handler = core.handler; diff --git a/package.json b/package.json index 9ed6e9b27..c5d590217 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "license": "MIT", "dependencies": { "@aws-sdk/client-s3": "^3.772.0", + "@vercel/blob": "^2.3.3", "serverlesswp": "^0.5.1", "sqlite3": "^5.1.7" }, diff --git a/test/Dockerfile-blob b/test/Dockerfile-blob new file mode 100644 index 000000000..3b6472075 --- /dev/null +++ b/test/Dockerfile-blob @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/nodejs:22 + +COPY package.json ${LAMBDA_TASK_ROOT}/ +RUN npm install --omit=optional --omit=dev + +COPY api/ ${LAMBDA_TASK_ROOT}/api/ +COPY util/ ${LAMBDA_TASK_ROOT}/util/ +COPY wp/ ${LAMBDA_TASK_ROOT}/wp/ +COPY test/installer.php ${LAMBDA_TASK_ROOT}/wp/ +COPY test/vercel-blob-emulator/undici-patch.js ${LAMBDA_TASK_ROOT}/blob-test/undici-patch.js +COPY test/vercel-blob-emulator/handler.js ${LAMBDA_TASK_ROOT}/blob-test/handler.js + +CMD [ "blob-test/handler.handler" ] diff --git a/test/run-blob-test.sh b/test/run-blob-test.sh new file mode 100755 index 000000000..43d00c535 --- /dev/null +++ b/test/run-blob-test.sh @@ -0,0 +1,90 @@ +#!/bin/bash +set -euo pipefail + +cd .. +docker build -t serverlesswp-blob-test -f test/Dockerfile-blob . +cd test + +# Clean up any leftovers from a previous run +pkill -f "node proxy.js" 2>/dev/null || true +pkill -f "node vercel-blob-emulator/server.js" 2>/dev/null || true +docker stop serverlesswp-test serverlesswp-test-readonly 2>/dev/null || true +docker rm serverlesswp-test serverlesswp-test-readonly 2>/dev/null || true + +VERCEL=${VERCEL:-1} +VERCEL_GIT_COMMIT_REF=${VERCEL_GIT_COMMIT_REF:-test_branch} + +# Token format: vercel_blob_rw__. The mock derives the storeId +# and rebuilds the hardcoded blob download URL from it. Must match STORE_ID. +STORE_ID=test +BLOB_TOKEN="vercel_blob_rw_${STORE_ID}_testsecret" +FAKE_BLOB_PORT=7000 + +PORT=$FAKE_BLOB_PORT STORE_ID=$STORE_ID ACCESS=private \ + node vercel-blob-emulator/server.js > /dev/null 2>&1 & +FAKE_BLOB_PID=$! + +# Wait for the emulator to be ready +until curl -s -o /dev/null -w "%{http_code}" http://localhost:$FAKE_BLOB_PORT/does-not-exist | grep -q 404; do sleep 1; done + +# host-gateway lets the container reach the host-side blob emulator via +# http://host.docker.internal. Works on Docker Desktop and Docker Engine >= 20.10. +docker run \ + --add-host=host.docker.internal:host-gateway \ + -e BLOB_READ_WRITE_TOKEN=$BLOB_TOKEN \ + -e VERCEL_BLOB_API_URL=http://host.docker.internal:$FAKE_BLOB_PORT \ + -e VERCEL_BLOB_MOCK_URL=http://host.docker.internal:$FAKE_BLOB_PORT \ + -e VERCEL=$VERCEL -e VERCEL_GIT_COMMIT_REF=$VERCEL_GIT_COMMIT_REF \ + -e SERVERLESSWP_TESTING=1 \ + -e SERVERLESSWP_READ_ONLY_MODE=false \ + -p 9000:8080 \ + -d --name serverlesswp-test serverlesswp-blob-test + +node proxy.js > /dev/null 2>&1 & +PROXY_PID=$! + +cleanup() { + kill $PROXY_PID 2>/dev/null || true + kill $FAKE_BLOB_PID 2>/dev/null || true + docker stop serverlesswp-test 2>/dev/null || true + docker rm serverlesswp-test 2>/dev/null || true + docker stop serverlesswp-test-readonly 2>/dev/null || true + docker rm serverlesswp-test-readonly 2>/dev/null || true +} +trap cleanup EXIT + +until curl -sfko /dev/null https://localhost:3000/; do sleep 1; done + +echo "Testing static file serving..." +static_check=$(curl -sk -o /dev/null -w "%{http_code} %{content_type}" https://localhost:3000/wp-includes/css/classic-themes.css) +http_code=${static_check%% *} +content_type=${static_check#* } +[[ "$http_code" == "200" ]] || { echo "Static file test FAILED: expected 200, got $http_code"; exit 1; } +[[ "$content_type" == *"text/css"* ]] || { echo "Static file content-type FAILED: expected text/css, got $content_type"; exit 1; } +echo "Static file test passed." + +npm install +npx playwright install chromium +ldconfig -p | grep -q libnspr4 || sudo env PATH="$PATH" node_modules/.bin/playwright install-deps chromium +SCREENSHOTS=${SCREENSHOTS:-} npx playwright test e2e.spec.js "$@" + +# Read-only mode tests — reuse the populated emulator state from above. +echo "Starting read-only mode tests..." +docker stop serverlesswp-test +docker rm serverlesswp-test + +docker run \ + --add-host=host.docker.internal:host-gateway \ + -e BLOB_READ_WRITE_TOKEN=$BLOB_TOKEN \ + -e VERCEL_BLOB_API_URL=http://host.docker.internal:$FAKE_BLOB_PORT \ + -e VERCEL_BLOB_MOCK_URL=http://host.docker.internal:$FAKE_BLOB_PORT \ + -e VERCEL=$VERCEL -e VERCEL_GIT_COMMIT_REF=$VERCEL_GIT_COMMIT_REF \ + -e SERVERLESSWP_TESTING=1 \ + -e SERVERLESSWP_READ_ONLY_MODE=1 \ + -e SERVERLESSWP_READ_ONLY_CACHE_MAX_AGE=3600 \ + -p 9000:8080 \ + -d --name serverlesswp-test-readonly serverlesswp-blob-test + +until curl -sfko /dev/null https://localhost:3000/; do sleep 1; done + +SKIP_AUTH=1 SCREENSHOTS=${SCREENSHOTS:-} npx playwright test e2e-read-only.spec.js "$@" diff --git a/test/vercel-blob-emulator/handler.js b/test/vercel-blob-emulator/handler.js new file mode 100644 index 000000000..d0adc5e16 --- /dev/null +++ b/test/vercel-blob-emulator/handler.js @@ -0,0 +1,4 @@ +// Test-only Lambda handler that applies the undici fetch patch before the real +// handler loads the Vercel Blob SDK. Used as the CMD for the blob-test image. +require('./undici-patch.js'); +exports.handler = require('../api/vercel.js').handler; diff --git a/test/vercel-blob-emulator/server.js b/test/vercel-blob-emulator/server.js new file mode 100644 index 000000000..c41da0eea --- /dev/null +++ b/test/vercel-blob-emulator/server.js @@ -0,0 +1,164 @@ +// Minimal Vercel Blob mock for e2e tests. +// Implements the endpoints our sqliteVercelBlob plugin relies on: +// PUT /?pathname= upload (honors x-if-match, x-allow-overwrite) +// GET /?url= head metadata +// GET / download (honors If-None-Match) +// POST /delete delete (honors x-if-match for single URL) +// +// ETags use SHA-1 of the body, wrapped in double quotes (RFC 7232). + +const http = require('node:http'); +const crypto = require('node:crypto'); +const { URL } = require('node:url'); + +const PORT = parseInt(process.env.PORT || '7000', 10); +const STORE_ID = process.env.STORE_ID || 'test'; +const ACCESS = process.env.ACCESS || 'private'; +const BASE_HOST = `${STORE_ID}.${ACCESS}.blob.vercel-storage.com`; + +const store = new Map(); + +function computeEtag(buffer) { + return `"${crypto.createHash('sha1').update(buffer).digest('hex')}"`; +} + +function jsonError(res, status, code, message = '') { + res.statusCode = status; + res.setHeader('content-type', 'application/json'); + res.end(JSON.stringify({ error: { code, message } })); +} + +function metadata(pathname, entry) { + const url = `https://${BASE_HOST}/${pathname}`; + return { + url, + downloadUrl: url + '?download=1', + pathname, + contentType: entry.contentType, + contentDisposition: `attachment; filename="${pathname.split('/').pop()}"`, + cacheControl: 'public, max-age=31536000, must-revalidate', + size: entry.body.length, + uploadedAt: entry.uploadedAt, + etag: entry.etag, + }; +} + +function readBody(req) { + return new Promise((resolve, reject) => { + const chunks = []; + req.on('data', c => chunks.push(c)); + req.on('end', () => resolve(Buffer.concat(chunks))); + req.on('error', reject); + }); +} + +function extractPathname(input) { + try { + return new URL(input).pathname.slice(1); + } catch { + return input.startsWith('/') ? input.slice(1) : input; + } +} + +const server = http.createServer(async (req, res) => { + const url = new URL(req.url, `http://${req.headers.host || 'localhost'}`); + const method = req.method; + + try { + if (method === 'PUT' && url.pathname === '/' && url.searchParams.has('pathname')) { + const pathname = url.searchParams.get('pathname'); + const body = await readBody(req); + const current = store.get(pathname); + const ifMatch = req.headers['x-if-match']; + const allowOverwrite = req.headers['x-allow-overwrite'] === '1'; + + if (ifMatch) { + if (!current || current.etag !== ifMatch) { + return jsonError(res, 412, 'precondition_failed', 'ETag mismatch'); + } + } else if (current && !allowOverwrite) { + return jsonError(res, 400, 'bad_request', 'Blob exists and overwrite is not allowed'); + } + + const entry = { + body, + etag: computeEtag(body), + contentType: req.headers['x-content-type'] || 'application/octet-stream', + uploadedAt: new Date().toISOString(), + }; + store.set(pathname, entry); + + res.statusCode = 200; + res.setHeader('content-type', 'application/json'); + res.end(JSON.stringify(metadata(pathname, entry))); + return; + } + + if (method === 'GET' && url.pathname === '/' && url.searchParams.has('url')) { + const pathname = extractPathname(url.searchParams.get('url')); + const entry = store.get(pathname); + if (!entry) { + return jsonError(res, 404, 'not_found', 'Blob not found'); + } + res.statusCode = 200; + res.setHeader('content-type', 'application/json'); + res.end(JSON.stringify(metadata(pathname, entry))); + return; + } + + if (method === 'POST' && url.pathname === '/delete') { + const body = await readBody(req); + let urls = []; + try { ({ urls = [] } = JSON.parse(body.toString() || '{}')); } catch {} + const ifMatch = req.headers['x-if-match']; + for (const u of urls) { + const pathname = extractPathname(u); + const current = store.get(pathname); + if (ifMatch && current && current.etag !== ifMatch) { + return jsonError(res, 412, 'precondition_failed', 'ETag mismatch'); + } + store.delete(pathname); + } + res.statusCode = 200; + res.setHeader('content-type', 'application/json'); + res.end('{}'); + return; + } + + if (method === 'GET') { + const pathname = url.pathname.slice(1); + const entry = store.get(pathname); + if (!entry) { + res.statusCode = 404; + res.end(); + return; + } + const lastModified = new Date(entry.uploadedAt).toUTCString(); + if (req.headers['if-none-match'] === entry.etag) { + res.statusCode = 304; + res.setHeader('etag', entry.etag); + res.setHeader('last-modified', lastModified); + res.end(); + return; + } + res.statusCode = 200; + res.setHeader('etag', entry.etag); + res.setHeader('content-type', entry.contentType); + res.setHeader('content-length', entry.body.length); + res.setHeader('last-modified', lastModified); + res.end(entry.body); + return; + } + + res.statusCode = 405; + res.end(); + } catch (err) { + console.error('vercel-blob-emulator error:', err); + res.statusCode = 500; + res.end(); + } +}); + +server.listen(PORT, () => { + console.log(`vercel-blob-emulator listening on :${PORT} (storeId=${STORE_ID}, access=${ACCESS})`); +}); diff --git a/test/vercel-blob-emulator/undici-patch.js b/test/vercel-blob-emulator/undici-patch.js new file mode 100644 index 000000000..991950efc --- /dev/null +++ b/test/vercel-blob-emulator/undici-patch.js @@ -0,0 +1,44 @@ +// Test-only monkey patch: redirects fetch() calls to `*.blob.vercel-storage.com` +// to the Vercel Blob emulator at VERCEL_BLOB_MOCK_URL. +// +// The Vercel Blob SDK hardcodes download URLs (`constructBlobUrl` in +// @vercel/blob/dist/index.cjs) and validates URL inputs to get() against +// `.blob.vercel-storage.com`, so we can't override the host via env vars. The +// SDK's compiled CJS does `_undici.fetch.call(...)` against the shared undici +// module exports, so replacing `undici.fetch` after it's been required is +// picked up by every subsequent SDK call. +// +// Put/head/list/delete go through VERCEL_BLOB_API_URL (set separately). + +const undici = require('undici'); + +const MOCK_URL = process.env.VERCEL_BLOB_MOCK_URL; +if (MOCK_URL) { + const mockBase = new URL(MOCK_URL); + const originalFetch = undici.fetch; + + undici.fetch = function patchedFetch(input, opts) { + let urlStr; + if (typeof input === 'string') { + urlStr = input; + } else if (input instanceof URL) { + urlStr = input.toString(); + } else if (input && typeof input.url === 'string') { + urlStr = input.url; + } + + if (urlStr) { + try { + const parsed = new URL(urlStr); + if (parsed.hostname.endsWith('.blob.vercel-storage.com')) { + const rewritten = new URL(parsed.pathname + parsed.search, mockBase).toString(); + return originalFetch.call(this, rewritten, opts); + } + } catch {} + } + + return originalFetch.call(this, input, opts); + }; + + console.log('[undici-patch] redirecting *.blob.vercel-storage.com to', MOCK_URL); +} diff --git a/util/install.js b/util/install.js index 8352f41c8..6ee38eea8 100644 --- a/util/install.js +++ b/util/install.js @@ -1,5 +1,6 @@ exports.validate = function(response) { let hasSqliteS3 = false; + let hasVercelBlob = false; let hasSQL = false; let platform = 'AWS'; let dashboardLink; @@ -8,6 +9,10 @@ exports.validate = function(response) { hasSqliteS3 = true; } + if (process.env['BLOB_READ_WRITE_TOKEN']) { + hasVercelBlob = true; + } + if (process.env['DATABASE'] && process.env['USERNAME'] && process.env['PASSWORD'] && process.env['HOST']) { hasSQL = true; } @@ -24,7 +29,7 @@ exports.validate = function(response) { dashboardLink = 'https://console.aws.amazon.com/console/home'; } - if (!hasSQL && !hasSqliteS3) { + if (!hasSQL && !hasSqliteS3 && !hasVercelBlob) { let data = {}; data.dashboardLink = dashboardLink; data.platform = platform; diff --git a/util/sqliteVercelBlob.js b/util/sqliteVercelBlob.js new file mode 100644 index 000000000..938947956 --- /dev/null +++ b/util/sqliteVercelBlob.js @@ -0,0 +1,214 @@ +const sqlite3 = require('sqlite3').verbose(); +const fs = require('fs').promises; +const fsSync = require('fs'); +const { Readable } = require('node:stream'); +const { pipeline } = require('node:stream/promises'); +const { get, put, BlobPreconditionFailedError, BlobNotFoundError } = require('@vercel/blob'); + +const ETAG_CACHE = '/tmp/etag-vercel-blob.txt'; +let sqliteFilePath = '/tmp/wp-sqlite-s3.sqlite'; + +let init = false; +let db; +let dataVersion; +let _config; + +exports.name = 'ServerlessWP sqlite Vercel Blob'; + +exports.config = function(config) { + _config = config; +} + +exports.preRequest = async function(event) { + if (!_config?.pathname) { + throw new Error("Vercel Blob pathname is required"); + } + + const cachedEtag = await getEtag(); + + const options = { access: 'private' }; + if (_config.token) { + options.token = _config.token; + } + if (cachedEtag) { + options.ifNoneMatch = cachedEtag; + } + + try { + const response = await get(_config.pathname, options); + + if (!response) { + // Blob doesn't exist yet - behave like a new site. + return; + } + + if (response.statusCode === 304) { + // No need to download, just use existing local file. + db = new sqlite3.Database(sqliteFilePath); + dataVersion = await getDataVersion(); + return; + } + + if (response.statusCode === 200 && response.stream) { + await pipeline( + Readable.fromWeb(response.stream), + fsSync.createWriteStream(sqliteFilePath) + ); + db = new sqlite3.Database(sqliteFilePath); + dataVersion = await getDataVersion(); + if (response.blob?.etag) { + await setEtag(response.blob.etag); + } + } + } + catch (err) { + if (err instanceof BlobNotFoundError) { + console.log('Database blob not found'); + return; + } + console.error('Error fetching database blob:', err); + } +} + +exports.postRequest = async function(event, response) { + try { + const dbExists = await exists(sqliteFilePath); + if (!db) { + if (dbExists) { + db = new sqlite3.Database(sqliteFilePath); + dataVersion = null; + } else { + return; + } + } + const versionNow = await getDataVersion(); + + // See if the db has been mutated, if so, send the changes to the blob store. + const readOnly = process.env['SERVERLESSWP_READ_ONLY_MODE']; + const readOnlyActive = readOnly && !['false', '0', 'no'].includes(readOnly.toLowerCase()); + if (!readOnlyActive && dataVersion !== versionNow) { + if (dbExists) { + try { + await dbClose(); + + const sqliteContent = await fs.readFile(sqliteFilePath); + const currentEtag = await getEtag(); + + const putOptions = { + access: 'private', + allowOverwrite: true, + addRandomSuffix: false, + }; + if (_config.token) { + putOptions.token = _config.token; + } + if (currentEtag) { + putOptions.ifMatch = currentEtag; + } + + const putResponse = await put(_config.pathname, sqliteContent, putOptions); + if (putResponse?.etag) { + await setEtag(putResponse.etag); + } + return; + } + catch (err) { + console.log(err); + const errResponse = { + statusCode: 500, + body: 'Database error. This can happen when simultaneous database updates happen. Re-try your request.' + } + if (err instanceof BlobPreconditionFailedError) { + errResponse.retry = true; + console.log('Retrying database save to Vercel Blob because of a conflicting update.'); + } + return errResponse; + } + } + } + + await dbClose(); + } + catch (err) { + console.log(err); + } +} + +async function getEtag() { + try { + return await fs.readFile(ETAG_CACHE, 'utf8'); + } catch (err) { + return ''; + } +} + +async function setEtag(newEtag) { + await fs.writeFile(ETAG_CACHE, newEtag); +} + +async function getDataVersion() { + return new Promise((resolve, reject) => { + if (!db) { reject('No db') } + try { + db.get("PRAGMA data_version", (err, row) => { + if (err) { + reject(err); + } else { + resolve(row['data_version']); + } + }); + } + catch (err) { + reject(err); + } + }); +} + +async function dbClose() { + return new Promise((resolve, reject) => { + if (!db) { reject('No db') } + try { + db.close((closeErr) => { + if (closeErr) { + reject(closeErr); + } + resolve(); + }); + } + catch (err) { + reject(err); + } + }); +} + +async function exists(path) { + try { + await fs.access(path); + return true; + } catch (error) { + return false; + } +} + +// Put the sqlite db class in place if not already there. +// Paths should reference where they've been setup in /tmp. +exports.prepPlugin = async function (wpContentPath, sqlitePluginPath) { + if (!init) { + try { + const pluginPackagePath = sqlitePluginPath; + const oldPath = pluginPackagePath + '/db.copy'; + const newPath = wpContentPath + '/db.php'; + await fs.copyFile(oldPath, newPath); + const content = await fs.readFile(newPath, 'utf8'); + const modifiedContent = content + .replace(new RegExp(/{SQLITE_IMPLEMENTATION_FOLDER_PATH}/, 'g'), pluginPackagePath) + .replace(new RegExp(/{SQLITE_PLUGIN}/, 'g'), 'sqlite-database-integration/load.php'); + + await fs.writeFile(newPath, modifiedContent); + init = true; + } + catch (err) { + console.log(err); + } + } +} diff --git a/vercel.json b/vercel.json index baae387a8..2773fcf25 100644 --- a/vercel.json +++ b/vercel.json @@ -6,7 +6,7 @@ }, { "source": "/(.*)", - "destination": "/api/index.js" + "destination": "/api/vercel.js" } ], "headers": [ @@ -21,7 +21,7 @@ } ], "functions": { - "api/index.js": { + "api/vercel.js": { "includeFiles": "wp/**", "maxDuration": 60 } diff --git a/wp/wp-config.php b/wp/wp-config.php index 412233711..0e7faa018 100644 --- a/wp/wp-config.php +++ b/wp/wp-config.php @@ -129,8 +129,8 @@ define('DISALLOW_FILE_EDIT', true ); define('DISALLOW_FILE_MODS', true ); -// If using SQLite + S3 instead of MySQL/MariaDB. -if (isset($_ENV['SQLITE_S3_BUCKET']) || isset($_ENV['SERVERLESSWP_DATA_SECRET'])) { +// If using SQLite + S3 or SQLite + Vercel Blob instead of MySQL/MariaDB. +if (isset($_ENV['SQLITE_S3_BUCKET']) || isset($_ENV['SERVERLESSWP_DATA_SECRET']) || isset($_ENV['BLOB_READ_WRITE_TOKEN'])) { define('DB_DIR', '/tmp'); define('DB_FILE', 'wp-sqlite-s3.sqlite'); define('DB_NAME', 'wp-sqlite'); From 9867d0c1019436b01f0ff588a0f58aec037a6170 Mon Sep 17 00:00:00 2001 From: Mitch MacKenzie Date: Thu, 23 Apr 2026 00:19:20 -0300 Subject: [PATCH 2/3] Fix path to test script --- .github/workflows/e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 207beb29e..20f493f37 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -14,7 +14,7 @@ jobs: working-directory: ./test - run: ./run-s3-test.sh - working-directory: ./ + working-directory: ./test - run: ./run-blob-test.sh working-directory: ./test \ No newline at end of file From 531e7e9165a7173fd82b08b8fc92c01ae6407160 Mon Sep 17 00:00:00 2001 From: Mitch MacKenzie Date: Tue, 5 May 2026 23:35:06 -0300 Subject: [PATCH 3/3] Update with concurrency safety changes --- api/vercel.js | 3 +- test/run-s3-test.sh | 7 + test/sqliteS3.concurrency.test.js | 278 ++++++++++++++++++++++++++++++ util/sqliteS3.js | 188 +++++++++++++------- util/sqliteVercelBlob.js | 173 ++++++++++++------- wp/wp-config.php | 13 +- 6 files changed, 534 insertions(+), 128 deletions(-) create mode 100644 test/sqliteS3.concurrency.test.js diff --git a/api/vercel.js b/api/vercel.js index f3d536618..82579c254 100644 --- a/api/vercel.js +++ b/api/vercel.js @@ -33,4 +33,5 @@ if (hasSqliteS3) { }); } -exports.handler = core.handler; +module.exports = core.handler; +module.exports.handler = core.handler; diff --git a/test/run-s3-test.sh b/test/run-s3-test.sh index 90bb910ce..df345d231 100755 --- a/test/run-s3-test.sh +++ b/test/run-s3-test.sh @@ -1,6 +1,13 @@ #!/bin/bash set -euo pipefail +# Unit tests for the sqliteS3 plugin — fast, no Docker. Run first so failures +# don't waste time on container builds. Requires root deps installed +# (npm install at the repo root). +echo "Running sqliteS3 unit tests..." +(cd .. && npm install --silent >/dev/null) +node --test sqliteS3.concurrency.test.js + ./build-test.sh # Clean up any leftovers from a previous run diff --git a/test/sqliteS3.concurrency.test.js b/test/sqliteS3.concurrency.test.js new file mode 100644 index 000000000..cd2bb1a29 --- /dev/null +++ b/test/sqliteS3.concurrency.test.js @@ -0,0 +1,278 @@ +// Concurrency tests for util/sqliteS3.js. +// +// These exercise the per-invocation working-file model: each request gets +// its own copy of the SQLite database, isolating concurrent requests on the +// same warm instance. + +const test = require('node:test'); +const assert = require('node:assert'); +const fs = require('fs').promises; +const path = require('path'); +const os = require('os'); +const sqlite3 = require('sqlite3').verbose(); + +const sqliteS3 = require('../util/sqliteS3.js'); + +const ETAG_CACHE = '/tmp/etag.txt'; +const CACHE_FILE = '/tmp/wp-sqlite-cache.sqlite'; + +// Build a small valid SQLite db file in memory and return its bytes. +async function buildDbBytes(seedRow) { + const tmp = path.join(os.tmpdir(), `seed-${Date.now()}-${Math.random()}.sqlite`); + await new Promise((resolve, reject) => { + const db = new sqlite3.Database(tmp); + db.serialize(() => { + db.run('CREATE TABLE t (v TEXT)'); + db.run('INSERT INTO t VALUES (?)', [seedRow], (err) => err ? reject(err) : null); + db.close((err) => err ? reject(err) : resolve()); + }); + }); + const bytes = await fs.readFile(tmp); + await fs.unlink(tmp); + return bytes; +} + +function makeMockS3({ initialBody, initialEtag = 'etag-1' }) { + const state = { + body: initialBody, + etag: initialEtag, + getCalls: 0, + putCalls: 0, + // Force the next N PUTs to fail with 412. + forcePutPreconditionFailures: 0, + }; + const client = { + async send(command) { + const name = command.constructor.name; + if (name === 'GetObjectCommand') { + state.getCalls++; + const input = command.input; + if (input.IfNoneMatch && input.IfNoneMatch === state.etag) { + const err = new Error('Not Modified'); + err.$metadata = { httpStatusCode: 304 }; + throw err; + } + return { Body: state.body, ETag: state.etag }; + } + if (name === 'PutObjectCommand') { + state.putCalls++; + const input = command.input; + if (state.forcePutPreconditionFailures > 0) { + state.forcePutPreconditionFailures--; + const err = new Error('Precondition Failed'); + err.$metadata = { httpStatusCode: 412 }; + throw err; + } + if (input.IfMatch && input.IfMatch !== state.etag) { + const err = new Error('Precondition Failed'); + err.$metadata = { httpStatusCode: 412 }; + throw err; + } + state.body = input.Body; + state.etag = 'etag-' + (state.putCalls + 1); + return { ETag: state.etag }; + } + throw new Error('Unexpected command: ' + name); + }, + }; + return { client, state }; +} + +// Make instances of the SDK command classes (without depending on the SDK +// directly). The mock checks .constructor.name, so define matching classes. +class GetObjectCommand { constructor(input) { this.input = input; } } +class PutObjectCommand { constructor(input) { this.input = input; } } + +async function cleanupTmp() { + for (const p of [ETAG_CACHE, CACHE_FILE]) { + try { await fs.unlink(p); } catch (e) {} + } + // Remove leftover working files from previous test runs. + const entries = await fs.readdir('/tmp'); + await Promise.all(entries + .filter(e => e.startsWith('wp-sqlite-') && e !== 'wp-sqlite-cache.sqlite') + .map(e => fs.unlink('/tmp/' + e).catch(() => {}))); +} + +test.beforeEach(async () => { + await cleanupTmp(); +}); + +test('concurrent preRequest calls produce distinct working files', async () => { + const body = await buildDbBytes('hello'); + const { client } = makeMockS3({ initialBody: body }); + sqliteS3._setClientForTests(client, { bucket: 'b', file: 'f' }); + + const events = [{}, {}, {}]; + await Promise.all(events.map(e => sqliteS3.preRequest(e))); + + const ctxKey = Symbol.for('serverlesswp.sqliteS3.context'); + const paths = events.map(e => e[ctxKey].workingPath); + + assert.strictEqual(new Set(paths).size, 3, 'each invocation gets a unique working path'); + for (const p of paths) { + assert.notStrictEqual(p, CACHE_FILE, 'working path is not the cache path'); + const stat = await fs.stat(p); + assert.ok(stat.size > 0, 'working file exists and is non-empty'); + } + // Each request opened its own db handle. + assert.ok(events.every(e => e[ctxKey].db), 'each context has its own db handle'); + assert.strictEqual(new Set(events.map(e => e[ctxKey].db)).size, 3, 'db handles are distinct'); + + // Clean up + await Promise.all(events.map(e => sqliteS3.postRequest(e, {}))); +}); + +test('concurrent writes do not corrupt each other; S3 arbitrates via ETag', async () => { + const body = await buildDbBytes('seed'); + const { client, state } = makeMockS3({ initialBody: body }); + sqliteS3._setClientForTests(client, { bucket: 'b', file: 'f' }); + + // Run many request pairs concurrently. Each writes a row. + const N = 8; + const events = Array.from({ length: N }, () => ({})); + + await Promise.all(events.map(async (event, i) => { + await sqliteS3.preRequest(event); + const ctxKey = Symbol.for('serverlesswp.sqliteS3.context'); + const ctx = event[ctxKey]; + // Mutate via a *separate* connection — PRAGMA data_version only + // increments when another connection commits. (In prod, PHP writes + // through its own connection while Node holds ctx.db.) + await new Promise((resolve, reject) => { + const writer = new sqlite3.Database(ctx.workingPath); + writer.run('INSERT INTO t VALUES (?)', ['row-' + i], (err) => { + if (err) return reject(err); + writer.close(() => resolve()); + }); + }); + await sqliteS3.postRequest(event, {}); + })); + + // At least one PUT should have succeeded; conflicting PUTs surface 412 + // through the retry response (the host runtime would re-invoke). + assert.ok(state.putCalls >= 1, 'at least one PUT was attempted'); + + // After all the dust settles, there should be no working files left + // and no SQLite handles open. A new preRequest should successfully open + // the cache as a valid (uncorrupted) SQLite database. + const leftovers = (await fs.readdir('/tmp')) + .filter(e => e.startsWith('wp-sqlite-') && e !== 'wp-sqlite-cache.sqlite'); + assert.deepStrictEqual(leftovers, [], 'no working files leaked'); + + // Verify the canonical S3 body is a valid SQLite file (not corrupt). + const verifyPath = '/tmp/sqliteS3-test-verify.sqlite'; + await fs.writeFile(verifyPath, state.body); + const rows = await new Promise((resolve, reject) => { + const db = new sqlite3.Database(verifyPath); + db.all('SELECT v FROM t', (err, rows) => { + if (err) return reject(err); + db.close(() => resolve(rows)); + }); + }); + await fs.unlink(verifyPath); + assert.ok(rows.length >= 1, 'final S3 body is a valid SQLite db with rows'); +}); + +test('412 on PUT returns retry response and does not refresh local cache', async () => { + const body = await buildDbBytes('seed'); + const { client, state } = makeMockS3({ initialBody: body }); + state.forcePutPreconditionFailures = 1; + sqliteS3._setClientForTests(client, { bucket: 'b', file: 'f' }); + + const event = {}; + await sqliteS3.preRequest(event); + const ctxKey = Symbol.for('serverlesswp.sqliteS3.context'); + const ctx = event[ctxKey]; + + // Snapshot the cache file's content/etag before mutating. + const cacheBefore = await fs.readFile(CACHE_FILE); + const etagBefore = await fs.readFile(ETAG_CACHE, 'utf8'); + + await new Promise((resolve, reject) => { + const writer = new sqlite3.Database(ctx.workingPath); + writer.run('INSERT INTO t VALUES (?)', ['conflict'], (err) => { + if (err) return reject(err); + writer.close(() => resolve()); + }); + }); + + const result = await sqliteS3.postRequest(event, {}); + assert.ok(result, 'postRequest returned an error response'); + assert.strictEqual(result.statusCode, 500); + assert.strictEqual(result.retry, true); + + // Cache must NOT have been refreshed with the failed write. + const cacheAfter = await fs.readFile(CACHE_FILE); + const etagAfter = await fs.readFile(ETAG_CACHE, 'utf8'); + assert.deepStrictEqual(cacheAfter, cacheBefore, 'cache file unchanged after 412'); + assert.strictEqual(etagAfter, etagBefore, 'cached etag unchanged after 412'); + + // Working file should be cleaned up. + await assert.rejects(fs.access(ctx.workingPath)); +}); + +test('module state is not shared between concurrent requests', async () => { + // Specifically: request B mutating its db must not affect request A's + // dataVersion/db reference. + const body = await buildDbBytes('shared'); + const { client } = makeMockS3({ initialBody: body }); + sqliteS3._setClientForTests(client, { bucket: 'b', file: 'f' }); + + const a = {}, b = {}; + await sqliteS3.preRequest(a); + await sqliteS3.preRequest(b); + + const ctxKey = Symbol.for('serverlesswp.sqliteS3.context'); + assert.notStrictEqual(a[ctxKey].db, b[ctxKey].db); + assert.notStrictEqual(a[ctxKey].workingPath, b[ctxKey].workingPath); + + // Mutate B; A's data_version (captured at preRequest) should not change + // out from under it. + const aVersionBefore = a[ctxKey].dataVersion; + await new Promise((resolve, reject) => { + b[ctxKey].db.run('INSERT INTO t VALUES (?)', ['from-b'], (err) => err ? reject(err) : resolve()); + }); + assert.strictEqual(a[ctxKey].dataVersion, aVersionBefore, 'A\'s captured dataVersion is unaffected by B'); + + await sqliteS3.postRequest(a, {}); + await sqliteS3.postRequest(b, {}); +}); + +test('client-supplied X-Serverlesswp-Sqlite-File header is stripped', async () => { + const body = await buildDbBytes('seed'); + const { client } = makeMockS3({ initialBody: body }); + sqliteS3._setClientForTests(client, { bucket: 'b', file: 'f' }); + + // Try several casings — gateways may pass through arbitrary case. + const event = { + headers: { + 'X-Serverlesswp-Sqlite-File': '../wp-sqlite-cache.sqlite', + 'x-serverlesswp-sqlite-file': 'wp-sqlite-cache.sqlite', + 'X-SERVERLESSWP-SQLITE-FILE': 'etag.txt', + }, + }; + await sqliteS3.preRequest(event); + + // Exactly one header remains, and its value is the per-invocation file + // (not anything the client supplied). + const matching = Object.entries(event.headers) + .filter(([k]) => k.toLowerCase() === 'x-serverlesswp-sqlite-file'); + assert.strictEqual(matching.length, 1, 'one header survives'); + const [, value] = matching[0]; + assert.match(value, /^wp-sqlite-[0-9a-f-]+\.sqlite$/, 'value is the per-invocation working file name'); + assert.notStrictEqual(value, 'wp-sqlite-cache.sqlite'); + assert.notStrictEqual(value, 'etag.txt'); + + await sqliteS3.postRequest(event, {}); +}); + +// The sqliteS3 module imports the real @aws-sdk PutObjectCommand / +// GetObjectCommand classes. Our mock dispatches by constructor.name, but the +// production code constructs the *real* SDK command instances. So our mock +// must accept those names. Sanity-check that the SDK class names match. +test('SDK command class names match what the mock dispatches on', () => { + const sdk = require('@aws-sdk/client-s3'); + assert.strictEqual(new sdk.GetObjectCommand({}).constructor.name, 'GetObjectCommand'); + assert.strictEqual(new sdk.PutObjectCommand({}).constructor.name, 'PutObjectCommand'); +}); diff --git a/util/sqliteS3.js b/util/sqliteS3.js index 1296a613c..83c27dec0 100644 --- a/util/sqliteS3.js +++ b/util/sqliteS3.js @@ -1,15 +1,14 @@ const sqlite3 = require('sqlite3').verbose(); const fs = require('fs').promises; +const { randomUUID } = require('crypto'); const { S3Client, GetObjectCommand, PutObjectCommand } = require('@aws-sdk/client-s3'); const ETAG_CACHE = '/tmp/etag.txt'; -let sqliteFilePath = '/tmp/wp-sqlite-s3.sqlite'; +const CACHE_FILE = '/tmp/wp-sqlite-cache.sqlite'; +const CONTEXT_KEY = Symbol.for('serverlesswp.sqliteS3.context'); let init = false; -let db; -let dataVersion; let client; -let etag; let _config; exports.name = 'ServerlessWP sqlite s3'; @@ -26,6 +25,13 @@ exports.config = function(config) { } } +// Test-only: inject a mock S3 client without going through the real +// S3Client constructor. +exports._setClientForTests = function(mockClient, config) { + client = mockClient; + _config = config; +} + exports.preRequest = async function(event) { if (!_config?.bucket) { throw new Error("S3 bucket is required"); @@ -37,15 +43,37 @@ exports.preRequest = async function(event) { throw new Error("S3Client config is required"); } - let etag = await getEtag(); + const workingFileName = 'wp-sqlite-' + randomUUID() + '.sqlite'; + const ctx = { + workingPath: '/tmp/' + workingFileName, + db: null, + dataVersion: null, + }; + event[CONTEXT_KEY] = ctx; + + // Tell PHP (wp-config.php) which DB file to open for this request. + // Strip any inbound variant first so a client can't point WP at the + // cache file or another request's working file. wp-config.php also + // passes the value through basename() defensively. + if (!event.headers) event.headers = {}; + for (const k of Object.keys(event.headers)) { + if (k.toLowerCase() === 'x-serverlesswp-sqlite-file') { + delete event.headers[k]; + } + } + event.headers['x-serverlesswp-sqlite-file'] = workingFileName; + + let cachedEtag = await getEtag(); let getCommandParams = { Bucket: _config.bucket, Key: _config.file } - - if (etag) { - getCommandParams.IfNoneMatch = etag; + + // Only send IfNoneMatch if we actually have the cache file locally. + // Otherwise a 304 leaves us with no file to copy. + if (cachedEtag && await exists(CACHE_FILE)) { + getCommandParams.IfNoneMatch = cachedEtag; } const get = new GetObjectCommand(getCommandParams); @@ -54,9 +82,11 @@ exports.preRequest = async function(event) { const response = await client.send(get); if (response) { - await fs.writeFile(sqliteFilePath, response.Body); - db = new sqlite3.Database(sqliteFilePath); - dataVersion = await getDataVersion(); + // Write to a tmp path then atomically rename into place. + // Existing open fds against the old inode keep working. + const tmp = CACHE_FILE + '.' + randomUUID() + '.tmp'; + await fs.writeFile(tmp, response.Body); + await fs.rename(tmp, CACHE_FILE); await setEtag(response.ETag); } else { @@ -66,9 +96,7 @@ exports.preRequest = async function(event) { } catch (err) { if (err.$metadata && err.$metadata.httpStatusCode === 304) { - // No need to download, just use existing file - db = new sqlite3.Database(sqliteFilePath); - dataVersion = await getDataVersion(); + // Cache is up to date; fall through to copy below. } else if (err.$metadata?.httpStatusCode === 403) { if (_config.onAuthError) { @@ -78,78 +106,109 @@ exports.preRequest = async function(event) { console.error('Auto-registration failed:', regErr.message); } } + return; } else if (err.name === 'NoSuchKey') { // Handle case where the file doesn't exist on S3 console.log('Database file not found on server'); + return; } else { // Handle other errors console.error('Error fetching database:', err); + return; } } + + // If we have a cache file (from this request or a previous one), copy it + // to a per-invocation working file and open SQLite against that copy. + // This isolates concurrent requests on the same warm instance. + if (await exists(CACHE_FILE)) { + await fs.copyFile(CACHE_FILE, ctx.workingPath); + ctx.db = new sqlite3.Database(ctx.workingPath); + ctx.dataVersion = await getDataVersion(ctx.db); + } } exports.postRequest = async function(event, response) { + const ctx = event[CONTEXT_KEY]; + if (!ctx) { + return; + } + try { - // If db wasn't initialized but file exists, this is a new database - const dbExists = await exists(sqliteFilePath); - if (!db) { - if (dbExists) { - db = new sqlite3.Database(sqliteFilePath); - dataVersion = null; + // If db wasn't initialized but the working file somehow exists, treat + // it as a new database (e.g. fresh install path). + const workingExists = await exists(ctx.workingPath); + if (!ctx.db) { + if (workingExists) { + ctx.db = new sqlite3.Database(ctx.workingPath); + ctx.dataVersion = null; } else { return; } } - let versionNow = await getDataVersion(); + + let versionNow = await getDataVersion(ctx.db); // See if the db has been mutated, if so, send the changes to s3 - if ((!process.env['SERVERLESSWP_READ_ONLY_MODE'] || ['false', '0', 'no'].includes(process.env['SERVERLESSWP_READ_ONLY_MODE'].toLowerCase())) && dataVersion !== versionNow) { - if (dbExists) { - try { - await dbClose(); - - const sqliteContent = await fs.readFile(sqliteFilePath); - let currentEtag = await getEtag(); - - let putCommandParams = { - Bucket: _config.bucket, - Key: _config.file, - Body: sqliteContent, - } - - if (currentEtag) { - putCommandParams.IfMatch = currentEtag; - } - const command = new PutObjectCommand(putCommandParams); - - const response = await client.send(command); - await setEtag(response.ETag); - // should db be closed? - return; + const readOnly = process.env['SERVERLESSWP_READ_ONLY_MODE'] && !['false', '0', 'no'].includes(process.env['SERVERLESSWP_READ_ONLY_MODE'].toLowerCase()); + if (!readOnly && ctx.dataVersion !== versionNow && workingExists) { + try { + await dbClose(ctx.db); + ctx.db = null; + + const sqliteContent = await fs.readFile(ctx.workingPath); + let currentEtag = await getEtag(); + + let putCommandParams = { + Bucket: _config.bucket, + Key: _config.file, + Body: sqliteContent, + } + + if (currentEtag) { + putCommandParams.IfMatch = currentEtag; + } + const command = new PutObjectCommand(putCommandParams); + + const putResponse = await client.send(command); + + // Refresh the local cache before writing the ETag so etag.txt + // never describes content newer than CACHE_FILE. If the copy + // fails, the old ETag stays on disk and the next request's + // IfNoneMatch will miss, triggering a clean re-fetch from S3. + const tmp = CACHE_FILE + '.' + randomUUID() + '.tmp'; + await fs.copyFile(ctx.workingPath, tmp); + await fs.rename(tmp, CACHE_FILE); + await setEtag(putResponse.ETag); + return; + } + catch (err) { + console.log(err); + let errResponse = { + statusCode: 500, + body: 'Database error. This can happen when simultaneous database updates happen. Re-try your request.' } - catch (err) { - console.log(err); - //@TODO: more descriptive message - let errResponse = { - statusCode: 500, - body: 'Database error. This can happen when simultaneous database updates happen. Re-try your request.' - } - if (err.$metadata && err.$metadata.httpStatusCode === 412) { - errResponse.retry = true; - console.log('Retrying database save to s3 because of a conflicting update.'); - } - return errResponse; + if (err.$metadata && err.$metadata.httpStatusCode === 412) { + errResponse.retry = true; + console.log('Retrying database save to s3 because of a conflicting update.'); } + return errResponse; } } - - await dbClose(); } catch (err) { console.log(err); } + finally { + if (ctx.db) { + try { await dbClose(ctx.db); } catch (e) { /* swallow */ } + ctx.db = null; + } + try { await fs.unlink(ctx.workingPath); } catch (e) { /* file may not exist */ } + delete event[CONTEXT_KEY]; + } } exports.branchNameToS3file = function(branch) { @@ -165,13 +224,12 @@ async function getEtag() { } async function setEtag(newEtag) { - etag = newEtag; await fs.writeFile(ETAG_CACHE, newEtag); } -async function getDataVersion() { +async function getDataVersion(db) { return new Promise((resolve, reject) => { - if (!db) { reject('No db') } + if (!db) { return reject('No db') } try { db.get("PRAGMA data_version", (err, row) => { if (err) { @@ -184,13 +242,13 @@ async function getDataVersion() { catch (err) { reject(err); } - + }); } -async function dbClose() { +async function dbClose(db) { return new Promise((resolve, reject) => { - if (!db) { reject('No db') } + if (!db) { return reject('No db') } try { db.close((closeErr) => { if (closeErr) { @@ -202,7 +260,7 @@ async function dbClose() { catch (err) { reject(err); } - + }); } diff --git a/util/sqliteVercelBlob.js b/util/sqliteVercelBlob.js index 938947956..2285784a0 100644 --- a/util/sqliteVercelBlob.js +++ b/util/sqliteVercelBlob.js @@ -1,16 +1,16 @@ const sqlite3 = require('sqlite3').verbose(); const fs = require('fs').promises; const fsSync = require('fs'); +const { randomUUID } = require('crypto'); const { Readable } = require('node:stream'); const { pipeline } = require('node:stream/promises'); const { get, put, BlobPreconditionFailedError, BlobNotFoundError } = require('@vercel/blob'); const ETAG_CACHE = '/tmp/etag-vercel-blob.txt'; -let sqliteFilePath = '/tmp/wp-sqlite-s3.sqlite'; +const CACHE_FILE = '/tmp/wp-sqlite-cache.sqlite'; +const CONTEXT_KEY = Symbol.for('serverlesswp.sqliteVercelBlob.context'); let init = false; -let db; -let dataVersion; let _config; exports.name = 'ServerlessWP sqlite Vercel Blob'; @@ -24,13 +24,35 @@ exports.preRequest = async function(event) { throw new Error("Vercel Blob pathname is required"); } + const workingFileName = 'wp-sqlite-' + randomUUID() + '.sqlite'; + const ctx = { + workingPath: '/tmp/' + workingFileName, + db: null, + dataVersion: null, + }; + event[CONTEXT_KEY] = ctx; + + // Tell PHP (wp-config.php) which DB file to open for this request. + // Strip any inbound variant first so a client can't point WP at the + // cache file or another request's working file. wp-config.php also + // passes the value through basename() defensively. + if (!event.headers) event.headers = {}; + for (const k of Object.keys(event.headers)) { + if (k.toLowerCase() === 'x-serverlesswp-sqlite-file') { + delete event.headers[k]; + } + } + event.headers['x-serverlesswp-sqlite-file'] = workingFileName; + const cachedEtag = await getEtag(); const options = { access: 'private' }; if (_config.token) { options.token = _config.token; } - if (cachedEtag) { + // Only send ifNoneMatch if we actually have the cache file locally. + // Otherwise a 304 leaves us with no file to copy. + if (cachedEtag && await exists(CACHE_FILE)) { options.ifNoneMatch = cachedEtag; } @@ -43,19 +65,17 @@ exports.preRequest = async function(event) { } if (response.statusCode === 304) { - // No need to download, just use existing local file. - db = new sqlite3.Database(sqliteFilePath); - dataVersion = await getDataVersion(); - return; + // Cache is up to date; fall through to copy below. } - - if (response.statusCode === 200 && response.stream) { + else if (response.statusCode === 200 && response.stream) { + // Stream to a tmp path then atomically rename into place. + // Existing open fds against the old inode keep working. + const tmp = CACHE_FILE + '.' + randomUUID() + '.tmp'; await pipeline( Readable.fromWeb(response.stream), - fsSync.createWriteStream(sqliteFilePath) + fsSync.createWriteStream(tmp) ); - db = new sqlite3.Database(sqliteFilePath); - dataVersion = await getDataVersion(); + await fs.rename(tmp, CACHE_FILE); if (response.blob?.etag) { await setEtag(response.blob.etag); } @@ -67,71 +87,102 @@ exports.preRequest = async function(event) { return; } console.error('Error fetching database blob:', err); + return; + } + + // If we have a cache file (from this request or a previous one), copy it + // to a per-invocation working file and open SQLite against that copy. + // This isolates concurrent requests on the same warm instance. + if (await exists(CACHE_FILE)) { + await fs.copyFile(CACHE_FILE, ctx.workingPath); + ctx.db = new sqlite3.Database(ctx.workingPath); + ctx.dataVersion = await getDataVersion(ctx.db); } } exports.postRequest = async function(event, response) { + const ctx = event[CONTEXT_KEY]; + if (!ctx) { + return; + } + try { - const dbExists = await exists(sqliteFilePath); - if (!db) { - if (dbExists) { - db = new sqlite3.Database(sqliteFilePath); - dataVersion = null; + // If db wasn't initialized but the working file somehow exists, treat + // it as a new database (e.g. fresh install path). + const workingExists = await exists(ctx.workingPath); + if (!ctx.db) { + if (workingExists) { + ctx.db = new sqlite3.Database(ctx.workingPath); + ctx.dataVersion = null; } else { return; } } - const versionNow = await getDataVersion(); + + const versionNow = await getDataVersion(ctx.db); // See if the db has been mutated, if so, send the changes to the blob store. const readOnly = process.env['SERVERLESSWP_READ_ONLY_MODE']; const readOnlyActive = readOnly && !['false', '0', 'no'].includes(readOnly.toLowerCase()); - if (!readOnlyActive && dataVersion !== versionNow) { - if (dbExists) { - try { - await dbClose(); - - const sqliteContent = await fs.readFile(sqliteFilePath); - const currentEtag = await getEtag(); - - const putOptions = { - access: 'private', - allowOverwrite: true, - addRandomSuffix: false, - }; - if (_config.token) { - putOptions.token = _config.token; - } - if (currentEtag) { - putOptions.ifMatch = currentEtag; - } - - const putResponse = await put(_config.pathname, sqliteContent, putOptions); - if (putResponse?.etag) { - await setEtag(putResponse.etag); - } - return; + if (!readOnlyActive && ctx.dataVersion !== versionNow && workingExists) { + try { + await dbClose(ctx.db); + ctx.db = null; + + const sqliteContent = await fs.readFile(ctx.workingPath); + const currentEtag = await getEtag(); + + const putOptions = { + access: 'private', + allowOverwrite: true, + addRandomSuffix: false, + }; + if (_config.token) { + putOptions.token = _config.token; + } + if (currentEtag) { + putOptions.ifMatch = currentEtag; } - catch (err) { - console.log(err); - const errResponse = { - statusCode: 500, - body: 'Database error. This can happen when simultaneous database updates happen. Re-try your request.' - } - if (err instanceof BlobPreconditionFailedError) { - errResponse.retry = true; - console.log('Retrying database save to Vercel Blob because of a conflicting update.'); - } - return errResponse; + + const putResponse = await put(_config.pathname, sqliteContent, putOptions); + + // Refresh the local cache before writing the ETag so etag.txt + // never describes content newer than CACHE_FILE. If the copy + // fails, the old ETag stays on disk and the next request's + // ifNoneMatch will miss, triggering a clean re-fetch. + const tmp = CACHE_FILE + '.' + randomUUID() + '.tmp'; + await fs.copyFile(ctx.workingPath, tmp); + await fs.rename(tmp, CACHE_FILE); + if (putResponse?.etag) { + await setEtag(putResponse.etag); } + return; + } + catch (err) { + console.log(err); + const errResponse = { + statusCode: 500, + body: 'Database error. This can happen when simultaneous database updates happen. Re-try your request.' + } + if (err instanceof BlobPreconditionFailedError) { + errResponse.retry = true; + console.log('Retrying database save to Vercel Blob because of a conflicting update.'); + } + return errResponse; } } - - await dbClose(); } catch (err) { console.log(err); } + finally { + if (ctx.db) { + try { await dbClose(ctx.db); } catch (e) { /* swallow */ } + ctx.db = null; + } + try { await fs.unlink(ctx.workingPath); } catch (e) { /* file may not exist */ } + delete event[CONTEXT_KEY]; + } } async function getEtag() { @@ -146,9 +197,9 @@ async function setEtag(newEtag) { await fs.writeFile(ETAG_CACHE, newEtag); } -async function getDataVersion() { +async function getDataVersion(db) { return new Promise((resolve, reject) => { - if (!db) { reject('No db') } + if (!db) { return reject('No db') } try { db.get("PRAGMA data_version", (err, row) => { if (err) { @@ -164,9 +215,9 @@ async function getDataVersion() { }); } -async function dbClose() { +async function dbClose(db) { return new Promise((resolve, reject) => { - if (!db) { reject('No db') } + if (!db) { return reject('No db') } try { db.close((closeErr) => { if (closeErr) { diff --git a/wp/wp-config.php b/wp/wp-config.php index 0e7faa018..a23c83596 100644 --- a/wp/wp-config.php +++ b/wp/wp-config.php @@ -132,9 +132,20 @@ // If using SQLite + S3 or SQLite + Vercel Blob instead of MySQL/MariaDB. if (isset($_ENV['SQLITE_S3_BUCKET']) || isset($_ENV['SERVERLESSWP_DATA_SECRET']) || isset($_ENV['BLOB_READ_WRITE_TOKEN'])) { define('DB_DIR', '/tmp'); - define('DB_FILE', 'wp-sqlite-s3.sqlite'); + // Per-invocation working file path is supplied by the Node sqlite plugin + // via a request header so concurrent requests on the same warm instance + // don't share one file. Falls back to a fixed name if the header is missing. + if (!empty($_SERVER['HTTP_X_SERVERLESSWP_SQLITE_FILE'])) { + define('DB_FILE', basename($_SERVER['HTTP_X_SERVERLESSWP_SQLITE_FILE'])); + } else { + define('DB_FILE', 'wp-sqlite-s3.sqlite'); + } define('DB_NAME', 'wp-sqlite'); + // Force the rollback journal mode. The Node sqlite plugin uploads the + // single .sqlite file to remote storage at the end of each request. + define('SQLITE_JOURNAL_MODE', 'DELETE'); + // Auto-cron can cause db race conditions on these urls, don't bother with it. if (strpos($_SERVER['REQUEST_URI'], 'wp-admin') !== false || strpos($_SERVER['REQUEST_URI'], 'wp-login') !== false) { define('DISABLE_WP_CRON', true);