From 7797e179e43e71954387958405f614b46e1e2a41 Mon Sep 17 00:00:00 2001 From: kristofr Date: Wed, 6 May 2026 21:38:17 +0200 Subject: [PATCH 01/11] Add CRC-64/ECMA-182 transactional checksum support for StageBlock --- src/blob/handlers/BlockBlobHandler.ts | 18 ++++- .../StrictModelMiddlewareFactory.ts | 1 - src/common/utils/utils.ts | 81 +++++++++++++++++++ tests/blob/apis/blockblob.test.ts | 73 ++++++++++++++++- tests/blob/utils.test.ts | 62 +++++++++++++- 5 files changed, 230 insertions(+), 5 deletions(-) diff --git a/src/blob/handlers/BlockBlobHandler.ts b/src/blob/handlers/BlockBlobHandler.ts index ec9b11a12..5dbe0154f 100644 --- a/src/blob/handlers/BlockBlobHandler.ts +++ b/src/blob/handlers/BlockBlobHandler.ts @@ -1,5 +1,6 @@ import { convertRawHeadersToMetadata } from "../../common/utils/utils"; import { + computeTransactionalChecksums, getMD5FromStream, getMD5FromString, newEtag @@ -187,6 +188,7 @@ export default class BlockBlobHandler ? options.transactionalContentMD5 || context.request!.getHeader("content-md5") : undefined; + const contentCRC64 = options.transactionalContentCrc64; this.validateBlockId(blockId, blobCtx); @@ -208,12 +210,14 @@ export default class BlockBlobHandler ); } - // Calculate MD5 for validation + // Compute MD5 and CRC64 in a single pass over the stored extent const stream = await this.extentStore.readExtent( persistency, context.contextId ); - const calculatedContentMD5 = await getMD5FromStream(stream); + const { md5: calculatedContentMD5, crc64: calculatedCRC64 } = + await computeTransactionalChecksums(stream); + if (contentMD5 !== undefined) { if (typeof contentMD5 === "string") { const calculatedContentMD5String = Buffer.from( @@ -235,6 +239,15 @@ export default class BlockBlobHandler } } + if (contentCRC64 !== undefined) { + if (!Buffer.from(contentCRC64).equals(Buffer.from(calculatedCRC64))) { + throw StorageErrorFactory.getInvalidOperation( + context.contextId!, + "Provided transactional CRC64 doesn't match." + ); + } + } + const block: BlockModel = { accountName, containerName, @@ -255,6 +268,7 @@ export default class BlockBlobHandler const response: Models.BlockBlobStageBlockResponse = { statusCode: 201, contentMD5: undefined, // TODO: Block content MD5 + xMsContentCrc64: contentCRC64 !== undefined ? calculatedCRC64 : undefined, requestId: blobCtx.contextId, version: BLOB_API_VERSION, date, diff --git a/src/blob/middlewares/StrictModelMiddlewareFactory.ts b/src/blob/middlewares/StrictModelMiddlewareFactory.ts index 403293282..70565a521 100644 --- a/src/blob/middlewares/StrictModelMiddlewareFactory.ts +++ b/src/blob/middlewares/StrictModelMiddlewareFactory.ts @@ -20,7 +20,6 @@ export const UnsupportedHeadersBlocker: StrictModelRequestValidator = async ( logger: ILogger ): Promise => { const UnsupportedHeaderKeys = [ - HeaderConstants.X_MS_CONTENT_CRC64, HeaderConstants.X_MS_RANGE_GET_CONTENT_CRC64, HeaderConstants.X_MS_ENCRYPTION_KEY, HeaderConstants.X_MS_ENCRYPTION_KEY_SHA256, diff --git a/src/common/utils/utils.ts b/src/common/utils/utils.ts index 3debf4f56..9405b42b1 100644 --- a/src/common/utils/utils.ts +++ b/src/common/utils/utils.ts @@ -169,3 +169,84 @@ export async function getMD5FromStream( }); }); } + +// CRC-64/ECMA-182 implementation for Azure Storage transactional integrity checks. +// Algorithm and lookup-table approach adapted from the Azure Storage JavaScript SDK (MIT License): +// https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/storage/storage-blob/src/utils/crc64.ts +// Polynomial: 0x42F0E1EBA9EA3693 (ECMA-182 standard, unreflected, init=0, xorout=0) +const CRC64_POLY = 0x42f0e1eba9ea3693n; + +const CRC64_TABLE: readonly bigint[] = (() => { + const table: bigint[] = new Array(256); + for (let i = 0; i < 256; i++) { + let crc = BigInt(i) << 56n; + for (let j = 0; j < 8; j++) { + if ((crc & 0x8000000000000000n) !== 0n) { + crc = ((crc << 1n) ^ CRC64_POLY) & 0xffffffffffffffffn; + } else { + crc = (crc << 1n) & 0xffffffffffffffffn; + } + } + table[i] = crc; + } + return table; +})(); + +function crc64Accumulate(crc: bigint, chunk: Uint8Array): bigint { + for (let i = 0; i < chunk.length; i++) { + const index = Number((crc >> 56n) ^ BigInt(chunk[i])) & 0xff; + crc = ((crc << 8n) ^ CRC64_TABLE[index]) & 0xffffffffffffffffn; + } + return crc; +} + +function bigintToUint8Array(n: bigint): Uint8Array { + const buf = Buffer.allocUnsafe(8); + buf.writeUInt32BE(Number(n >> 32n) >>> 0, 0); + buf.writeUInt32BE(Number(n & 0xffffffffn) >>> 0, 4); + return buf; +} + +export function getCRC64FromString(text: string): Uint8Array { + return bigintToUint8Array(crc64Accumulate(0n, Buffer.from(text))); +} + +export async function getCRC64FromStream( + stream: NodeJS.ReadableStream +): Promise { + return new Promise((resolve, reject) => { + let crc = 0n; + stream + .on("data", (chunk: Buffer | string) => { + const data = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string); + crc = crc64Accumulate(crc, data); + }) + .on("end", () => { + resolve(bigintToUint8Array(crc)); + }) + .on("error", reject); + }); +} + +/** + * Computes MD5 and CRC-64/ECMA-182 in a single stream pass, avoiding + * reading the extent twice when both checksums may be needed. + */ +export async function computeTransactionalChecksums( + stream: NodeJS.ReadableStream +): Promise<{ md5: Uint8Array; crc64: Uint8Array }> { + const hash = createHash("md5"); + return new Promise((resolve, reject) => { + let crc = 0n; + stream + .on("data", (chunk: Buffer | string) => { + const data = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string); + hash.update(data); + crc = crc64Accumulate(crc, data); + }) + .on("end", () => { + resolve({ md5: hash.digest(), crc64: bigintToUint8Array(crc) }); + }) + .on("error", reject); + }); +} diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index 0f03201cd..1376475fc 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -18,7 +18,10 @@ import { getUniqueName, sleep } from "../../testutils"; -import { getMD5FromString } from "../../../src/common/utils/utils"; +import { + getCRC64FromString, + getMD5FromString +} from "../../../src/common/utils/utils"; // Set true to enable debug log configLogger(false); @@ -316,6 +319,74 @@ describe("BlockBlobAPIs", () => { assert.equal(listResponse.uncommittedBlocks![0].size, body.length); }); + it("stageBlock with correct crc64 should succeed @loki @sql", async () => { + const body = "HelloWorld"; + const crc64 = getCRC64FromString(body); + const options = { transactionalContentCrc64: new Uint8Array(crc64) }; + + const result = await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options + ); + + assert.equal(result._response.status, 201); + // Server must echo back the CRC64 it validated against + assert.ok( + result.xMsContentCrc64 !== undefined, + "Response should include x-ms-content-crc64" + ); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(crc64), + "Echoed CRC64 must match what was sent" + ); + + const listResponse = await blockBlobClient.getBlockList("uncommitted"); + assert.equal(listResponse.uncommittedBlocks!.length, 1); + assert.equal(listResponse.uncommittedBlocks![0].name, base64encode("1")); + assert.equal(listResponse.uncommittedBlocks![0].size, body.length); + }); + + it("stageBlock with wrong body should throw crc64 mismatch @loki @sql", async () => { + const body = "HelloWorld"; + // Provide CRC64 of a different payload — server must reject the upload + const wrongCrc64 = getCRC64FromString("differentBody"); + const options = { transactionalContentCrc64: new Uint8Array(wrongCrc64) }; + + try { + await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options + ); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal( + e.details.message.indexOf("Provided transactional CRC64 doesn't match."), + 0 + ); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("stageBlock without crc64 header should not include crc64 in response @loki @sql", async () => { + // When no x-ms-content-crc64 is sent the response must not include one, + // matching the behaviour of the real service. + const body = "HelloWorld"; + const result = await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length + ); + assert.equal(result._response.status, 201); + assert.strictEqual(result.xMsContentCrc64, undefined); + }); + it("commitBlockList @loki @sql", async () => { const body = "HelloWorld"; await blockBlobClient.stageBlock(base64encode("1"), body, body.length); diff --git a/tests/blob/utils.test.ts b/tests/blob/utils.test.ts index 9ecd72232..6826c6363 100644 --- a/tests/blob/utils.test.ts +++ b/tests/blob/utils.test.ts @@ -1,5 +1,10 @@ import * as assert from "assert"; -import { convertRawHeadersToMetadata } from "../../src/common/utils/utils"; +import { PassThrough } from "stream"; +import { + convertRawHeadersToMetadata, + getCRC64FromStream, + getCRC64FromString +} from "../../src/common/utils/utils"; describe("Utils", () => { it("convertRawHeadersToMetadata should work", () => { @@ -57,3 +62,58 @@ describe("Utils", () => { assert.deepStrictEqual(metadata, undefined); }); }); + +describe("CRC64", () => { + // CRC-64/ECMA-182 check value for "123456789" per the CRC catalogue: + // https://reveng.sourceforge.io/crc-catalogue/all.htm + it("getCRC64FromString matches the standard CRC-64/ECMA-182 check value for '123456789'", () => { + const result = getCRC64FromString("123456789"); + const hex = Buffer.from(result).toString("hex"); + assert.strictEqual(hex, "6c40df5f0b497347"); + }); + + it("getCRC64FromString produces an 8-byte result", () => { + assert.strictEqual(getCRC64FromString("").length, 8); + assert.strictEqual(getCRC64FromString("Hello, World!").length, 8); + }); + + it("getCRC64FromStream matches getCRC64FromString for the same data", async () => { + const data = "The quick brown fox jumps over the lazy dog"; + const fromString = getCRC64FromString(data); + + const stream = new PassThrough(); + stream.end(Buffer.from(data)); + const fromStream = await getCRC64FromStream(stream); + + assert.deepStrictEqual(Buffer.from(fromString), Buffer.from(fromStream)); + }); + + it("getCRC64FromStream produces identical results regardless of chunk boundaries", async () => { + // Streaming data split across different chunk sizes must produce the same + // CRC as a single contiguous buffer — chunk boundaries must not affect the result. + const data = Buffer.from("Azure Blob Storage block integrity check"); + const expected = getCRC64FromString(data.toString()); + + // Push as many 3-byte chunks (deliberately misaligned with any word boundary) + const chunked = new PassThrough(); + for (let i = 0; i < data.length; i += 3) { + chunked.push(data.slice(i, i + 3)); + } + chunked.push(null); + const fromChunked = await getCRC64FromStream(chunked); + + assert.deepStrictEqual(Buffer.from(fromChunked), Buffer.from(expected)); + }); + + it("getCRC64FromString produces distinct values for inputs that differ by a single byte", () => { + // Verifies the avalanche property: a one-byte change must alter the checksum. + const base = Buffer.from("block content for crc64 test"); + const mutated = Buffer.from(base); + mutated[mutated.length - 1] ^= 0x01; + + const crc1 = getCRC64FromString(base.toString("latin1")); + const crc2 = getCRC64FromString(mutated.toString("latin1")); + + assert.notDeepStrictEqual(Buffer.from(crc1), Buffer.from(crc2)); + }); +}); From d7157293f32c8e9f20b79209193bdb085e9261d7 Mon Sep 17 00:00:00 2001 From: kristofr Date: Wed, 6 May 2026 21:55:11 +0200 Subject: [PATCH 02/11] updated to remove bignum dep --- src/blob/handlers/BlockBlobHandler.ts | 36 +++++++++++------ src/common/utils/utils.ts | 57 ++++++++++++++++----------- 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/src/blob/handlers/BlockBlobHandler.ts b/src/blob/handlers/BlockBlobHandler.ts index 5dbe0154f..c56ae3204 100644 --- a/src/blob/handlers/BlockBlobHandler.ts +++ b/src/blob/handlers/BlockBlobHandler.ts @@ -1,6 +1,7 @@ -import { convertRawHeadersToMetadata } from "../../common/utils/utils"; import { computeTransactionalChecksums, + convertRawHeadersToMetadata, + getCRC64FromStream, getMD5FromStream, getMD5FromString, newEtag @@ -210,18 +211,31 @@ export default class BlockBlobHandler ); } - // Compute MD5 and CRC64 in a single pass over the stored extent - const stream = await this.extentStore.readExtent( - persistency, - context.contextId - ); - const { md5: calculatedContentMD5, crc64: calculatedCRC64 } = - await computeTransactionalChecksums(stream); + // Only read the stored extent when at least one transactional checksum was provided. + // Compute only what is needed to avoid unnecessary CPU work. + let calculatedContentMD5: Uint8Array | undefined; + let calculatedCRC64: Uint8Array | undefined; + + if (contentMD5 !== undefined || contentCRC64 !== undefined) { + const stream = await this.extentStore.readExtent( + persistency, + context.contextId + ); + if (contentMD5 !== undefined && contentCRC64 !== undefined) { + const result = await computeTransactionalChecksums(stream); + calculatedContentMD5 = result.md5; + calculatedCRC64 = result.crc64; + } else if (contentMD5 !== undefined) { + calculatedContentMD5 = await getMD5FromStream(stream); + } else { + calculatedCRC64 = await getCRC64FromStream(stream); + } + } if (contentMD5 !== undefined) { if (typeof contentMD5 === "string") { const calculatedContentMD5String = Buffer.from( - calculatedContentMD5 + calculatedContentMD5! ).toString("base64"); if (contentMD5 !== calculatedContentMD5String) { throw StorageErrorFactory.getInvalidOperation( @@ -230,7 +244,7 @@ export default class BlockBlobHandler ); } } else { - if (!Buffer.from(contentMD5).equals(calculatedContentMD5)) { + if (!Buffer.from(contentMD5).equals(calculatedContentMD5!)) { throw StorageErrorFactory.getInvalidOperation( context.contextId!, "Provided contentMD5 doesn't match." @@ -240,7 +254,7 @@ export default class BlockBlobHandler } if (contentCRC64 !== undefined) { - if (!Buffer.from(contentCRC64).equals(Buffer.from(calculatedCRC64))) { + if (!Buffer.from(contentCRC64).equals(Buffer.from(calculatedCRC64!))) { throw StorageErrorFactory.getInvalidOperation( context.contextId!, "Provided transactional CRC64 doesn't match." diff --git a/src/common/utils/utils.ts b/src/common/utils/utils.ts index 9405b42b1..7450e1979 100644 --- a/src/common/utils/utils.ts +++ b/src/common/utils/utils.ts @@ -174,55 +174,68 @@ export async function getMD5FromStream( // Algorithm and lookup-table approach adapted from the Azure Storage JavaScript SDK (MIT License): // https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/storage/storage-blob/src/utils/crc64.ts // Polynomial: 0x42F0E1EBA9EA3693 (ECMA-182 standard, unreflected, init=0, xorout=0) -const CRC64_POLY = 0x42f0e1eba9ea3693n; +// Represented as two 32-bit halves (hi, lo) to avoid BigInt. +const CRC64_POLY_HI = 0x42f0e1eb; +const CRC64_POLY_LO = 0xa9ea3693; -const CRC64_TABLE: readonly bigint[] = (() => { - const table: bigint[] = new Array(256); +// Flat table: entry i occupies [i*2] (hi) and [i*2+1] (lo). +const CRC64_TABLE: readonly number[] = (() => { + const table: number[] = new Array(512); for (let i = 0; i < 256; i++) { - let crc = BigInt(i) << 56n; + let hi = (i << 24) >>> 0; + let lo = 0; for (let j = 0; j < 8; j++) { - if ((crc & 0x8000000000000000n) !== 0n) { - crc = ((crc << 1n) ^ CRC64_POLY) & 0xffffffffffffffffn; + if ((hi & 0x80000000) !== 0) { + hi = (((hi << 1) | (lo >>> 31)) ^ CRC64_POLY_HI) >>> 0; + lo = ((lo << 1) ^ CRC64_POLY_LO) >>> 0; } else { - crc = (crc << 1n) & 0xffffffffffffffffn; + hi = ((hi << 1) | (lo >>> 31)) >>> 0; + lo = (lo << 1) >>> 0; } } - table[i] = crc; + table[i * 2] = hi; + table[i * 2 + 1] = lo; } return table; })(); -function crc64Accumulate(crc: bigint, chunk: Uint8Array): bigint { +function crc64Accumulate( + crcHi: number, crcLo: number, chunk: Uint8Array +): [number, number] { for (let i = 0; i < chunk.length; i++) { - const index = Number((crc >> 56n) ^ BigInt(chunk[i])) & 0xff; - crc = ((crc << 8n) ^ CRC64_TABLE[index]) & 0xffffffffffffffffn; + const index = ((crcHi >>> 24) ^ chunk[i]) & 0xff; + const tHi = CRC64_TABLE[index * 2]; + const tLo = CRC64_TABLE[index * 2 + 1]; + crcHi = (((crcHi << 8) | (crcLo >>> 24)) ^ tHi) >>> 0; + crcLo = ((crcLo << 8) ^ tLo) >>> 0; } - return crc; + return [crcHi, crcLo]; } -function bigintToUint8Array(n: bigint): Uint8Array { +function crc64ToUint8Array(hi: number, lo: number): Uint8Array { const buf = Buffer.allocUnsafe(8); - buf.writeUInt32BE(Number(n >> 32n) >>> 0, 0); - buf.writeUInt32BE(Number(n & 0xffffffffn) >>> 0, 4); + buf.writeUInt32BE(hi >>> 0, 0); + buf.writeUInt32BE(lo >>> 0, 4); return buf; } export function getCRC64FromString(text: string): Uint8Array { - return bigintToUint8Array(crc64Accumulate(0n, Buffer.from(text))); + const [hi, lo] = crc64Accumulate(0, 0, Buffer.from(text)); + return crc64ToUint8Array(hi, lo); } export async function getCRC64FromStream( stream: NodeJS.ReadableStream ): Promise { return new Promise((resolve, reject) => { - let crc = 0n; + let hi = 0, lo = 0; stream .on("data", (chunk: Buffer | string) => { const data = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string); - crc = crc64Accumulate(crc, data); + [hi, lo] = crc64Accumulate(hi, lo, data); }) .on("end", () => { - resolve(bigintToUint8Array(crc)); + resolve(crc64ToUint8Array(hi, lo)); }) .on("error", reject); }); @@ -237,15 +250,15 @@ export async function computeTransactionalChecksums( ): Promise<{ md5: Uint8Array; crc64: Uint8Array }> { const hash = createHash("md5"); return new Promise((resolve, reject) => { - let crc = 0n; + let hi = 0, lo = 0; stream .on("data", (chunk: Buffer | string) => { const data = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string); hash.update(data); - crc = crc64Accumulate(crc, data); + [hi, lo] = crc64Accumulate(hi, lo, data); }) .on("end", () => { - resolve({ md5: hash.digest(), crc64: bigintToUint8Array(crc) }); + resolve({ md5: hash.digest(), crc64: crc64ToUint8Array(hi, lo) }); }) .on("error", reject); }); From fafdf28585bbb42b9d186c05af6424ad74e4716c Mon Sep 17 00:00:00 2001 From: kristofr Date: Thu, 7 May 2026 17:16:08 +0200 Subject: [PATCH 03/11] fix counter generator to avoid flakiness --- tests/testutils.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/testutils.ts b/tests/testutils.ts index 86c214622..fc3966511 100644 --- a/tests/testutils.ts +++ b/tests/testutils.ts @@ -10,9 +10,14 @@ export const EMULATOR_ACCOUNT_NAME = "devstoreaccount1"; export const EMULATOR_ACCOUNT_KEY = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; +// Counter-based suffix instead of Math.random() to guarantee uniqueness within +// a test run. Random suffixes can collide when multiple entities are created +// within the same millisecond on fast CI runners, causing flaky batch tests. +let _uniqueNameCounter = 0; + export function getUniqueName(prefix: string): string { return `${prefix}${new Date().getTime()}${padStart( - Math.floor(Math.random() * 10000).toString(), + (++_uniqueNameCounter).toString(), 5, "00000" )}`; From d9bffe8d601c76e8f5d4fb0c7e2a58a8ba78b441 Mon Sep 17 00:00:00 2001 From: kristofr Date: Mon, 11 May 2026 15:03:28 +0200 Subject: [PATCH 04/11] added option to test test cases against real azure to confirm behavior. Also added logic to crc PutBlock and AppendBlock --- src/blob/errors/StorageErrorFactory.ts | 37 +++++++ src/blob/handlers/AppendBlobHandler.ts | 44 +++----- src/blob/handlers/BlobHandler.ts | 3 + src/blob/handlers/BlockBlobHandler.ts | 103 +++++------------ src/blob/utils/utils.ts | 55 +++++++++ src/common/utils/utils.ts | 95 +++++++++++----- tests/BlobTestServerFactory.ts | 16 +++ tests/blob/apis/appendblob.test.ts | 103 ++++++++++++++++- tests/blob/apis/blob.test.ts | 56 +++++++++- tests/blob/apis/blobbatch.test.ts | 4 +- tests/blob/apis/blockblob.test.ts | 148 ++++++++++++++++++++++--- tests/blob/apis/container.test.ts | 3 +- tests/blob/apis/pageblob.test.ts | 3 +- tests/blob/blockblob.highlevel.test.ts | 3 +- tests/blob/utils.test.ts | 9 +- tests/testutils.ts | 93 +++++++++++++++- 16 files changed, 616 insertions(+), 159 deletions(-) diff --git a/src/blob/errors/StorageErrorFactory.ts b/src/blob/errors/StorageErrorFactory.ts index c44776fb5..a0c4e897b 100644 --- a/src/blob/errors/StorageErrorFactory.ts +++ b/src/blob/errors/StorageErrorFactory.ts @@ -190,6 +190,43 @@ export default class StorageErrorFactory { ); } + public static getCrc64Mismatch( + contextID: string = DefaultID, + userSpecifiedCrc64: string, + serverCalculatedCrc64: string + ): StorageError { + return new StorageError( + 400, + "Crc64Mismatch", + "The CRC64 value specified in the request did not match with the CRC64 value calculated by the server.", + contextID, + { + UserSpecifiedCrc64: userSpecifiedCrc64, + ServerCalculatedCrc64: serverCalculatedCrc64 + } + ); + } + + public static getBothCrc64AndMd5HeaderPresent( + contextID: string = DefaultID + ): StorageError { + return new StorageError( + 400, + "BothCrc64AndMd5HeaderPresent", + "Both x-ms-content-crc64 header and Content-MD5 header are present.", + contextID + ); + } + + public static getInvalidMd5(contextID: string = DefaultID): StorageError { + return new StorageError( + 400, + "InvalidMd5", + "The MD5 value specified in the request is invalid. The MD5 value must be 128 bits and Base64-encoded.", + contextID + ); + } + public static getInvalidPageRange(contextID: string): StorageError { return new StorageError( 416, diff --git a/src/blob/handlers/AppendBlobHandler.ts b/src/blob/handlers/AppendBlobHandler.ts index 99bc462b5..dd34ce75f 100644 --- a/src/blob/handlers/AppendBlobHandler.ts +++ b/src/blob/handlers/AppendBlobHandler.ts @@ -1,5 +1,7 @@ -import { convertRawHeadersToMetadata } from "../../common/utils/utils"; -import { getMD5FromStream, newEtag } from "../../common/utils/utils"; +import { + convertRawHeadersToMetadata, + newEtag +} from "../../common/utils/utils"; import BlobStorageContext from "../context/BlobStorageContext"; import NotImplementedError from "../errors/NotImplementedError"; import StorageErrorFactory from "../errors/StorageErrorFactory"; @@ -13,7 +15,7 @@ import { MAX_APPEND_BLOB_BLOCK_COUNT, MAX_APPEND_BLOB_BLOCK_SIZE } from "../utils/constants"; -import { getTagsFromString } from "../utils/utils"; +import { computeAndValidateTransactionalChecksums, getTagsFromString } from "../utils/utils"; import BaseHandler from "./BaseHandler"; export default class AppendBlobHandler extends BaseHandler @@ -149,38 +151,28 @@ export default class AppendBlobHandler extends BaseHandler ); } - // MD5 + // MD5 and/or CRC64 transactional integrity validation const contentMD5 = blobCtx.request!.getHeader(HeaderConstants.CONTENT_MD5); + const contentCRC64 = options.transactionalContentCrc64; let contentMD5Buffer; - let contentMD5String; if (contentMD5 !== undefined) { contentMD5Buffer = typeof contentMD5 === "string" ? Buffer.from(contentMD5, "base64") : contentMD5; - contentMD5String = - typeof contentMD5 === "string" - ? contentMD5 - : contentMD5Buffer.toString("base64"); + } - const stream = await this.extentStore.readExtent( - extent, - blobCtx.contextId + // Per the Append Block REST contract, the service always computes a CRC64 + // of the appended block and returns it in x-ms-content-crc64. + const stream = await this.extentStore.readExtent(extent, blobCtx.contextId); + const { crc64: calculatedCRC64 } = + await computeAndValidateTransactionalChecksums( + stream, + { md5: contentMD5, crc64: contentCRC64 }, + context.contextId, + { crc64: true } ); - const calculatedContentMD5Buffer = await getMD5FromStream(stream); - const calculatedContentMD5String = Buffer.from( - calculatedContentMD5Buffer - ).toString("base64"); - - if (contentMD5String !== calculatedContentMD5String) { - throw StorageErrorFactory.getMd5Mismatch( - context.contextId, - contentMD5String, - calculatedContentMD5String - ); - } - } const originOffset = blob.properties.contentLength; @@ -206,7 +198,7 @@ export default class AppendBlobHandler extends BaseHandler eTag: properties.etag, lastModified: properties.lastModified, contentMD5: contentMD5Buffer, - xMsContentCrc64: undefined, + xMsContentCrc64: calculatedCRC64, clientRequestId: options.requestId, version: BLOB_API_VERSION, date, diff --git a/src/blob/handlers/BlobHandler.ts b/src/blob/handlers/BlobHandler.ts index 0ab7be045..1ee4b9bc6 100644 --- a/src/blob/handlers/BlobHandler.ts +++ b/src/blob/handlers/BlobHandler.ts @@ -907,6 +907,9 @@ export default class BlobHandler extends BaseHandler implements IBlobHandler { date: context.startTime, copyId: res.copyId, copyStatus, + // Per the Copy Blob From URL REST contract, echo the source's Content-MD5 + // back to the client when it was supplied in x-ms-source-content-md5. + contentMD5: options.sourceContentMD5, clientRequestId: options.requestId }; diff --git a/src/blob/handlers/BlockBlobHandler.ts b/src/blob/handlers/BlockBlobHandler.ts index c56ae3204..f5cdab2eb 100644 --- a/src/blob/handlers/BlockBlobHandler.ts +++ b/src/blob/handlers/BlockBlobHandler.ts @@ -1,8 +1,5 @@ import { - computeTransactionalChecksums, convertRawHeadersToMetadata, - getCRC64FromStream, - getMD5FromStream, getMD5FromString, newEtag } from "../../common/utils/utils"; @@ -16,7 +13,7 @@ import { parseXML } from "../generated/utils/xml"; import { BlobModel, BlockModel } from "../persistence/IBlobMetadataStore"; import { BLOB_API_VERSION } from "../utils/constants"; import BaseHandler from "./BaseHandler"; -import { getTagsFromString } from "../utils/utils"; +import { computeAndValidateTransactionalChecksums, getTagsFromString } from "../utils/utils"; /** * BlobHandler handles Azure Storage BlockBlob related requests. @@ -52,6 +49,7 @@ export default class BlockBlobHandler ? options.blobHTTPHeaders.blobContentMD5 || context.request!.getHeader("content-md5") : undefined; + const contentCRC64 = options.transactionalContentCrc64; await this.metadataStore.checkContainerExist( context, @@ -70,32 +68,19 @@ export default class BlockBlobHandler ); } - // Calculate MD5 for validation + // MD5 is always needed (persisted as the blob's contentMD5 property); + // CRC64 is computed in the same pass only when the client supplied one. const stream = await this.extentStore.readExtent( persistency, context.contextId ); - const calculatedContentMD5 = await getMD5FromStream(stream); - if (contentMD5 !== undefined) { - if (typeof contentMD5 === "string") { - const calculatedContentMD5String = Buffer.from( - calculatedContentMD5 - ).toString("base64"); - if (contentMD5 !== calculatedContentMD5String) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided contentMD5 doesn't match." - ); - } - } else { - if (!Buffer.from(contentMD5).equals(calculatedContentMD5)) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided contentMD5 doesn't match." - ); - } - } - } + const { md5: calculatedContentMD5 } = + await computeAndValidateTransactionalChecksums( + stream, + { md5: contentMD5, crc64: contentCRC64 }, + context.contextId, + { md5: true } + ); const blob: BlobModel = { deleted: false, @@ -211,56 +196,22 @@ export default class BlockBlobHandler ); } - // Only read the stored extent when at least one transactional checksum was provided. - // Compute only what is needed to avoid unnecessary CPU work. - let calculatedContentMD5: Uint8Array | undefined; - let calculatedCRC64: Uint8Array | undefined; - - if (contentMD5 !== undefined || contentCRC64 !== undefined) { - const stream = await this.extentStore.readExtent( - persistency, - context.contextId + // Per the Put Block REST contract, the service computes a CRC64 of the + // staged block and echoes it back in x-ms-content-crc64 unless the client + // supplied a Content-MD5 (Azure rejects supplying both). Compute CRC64 + // whenever no MD5 was supplied, regardless of whether the client supplied + // a CRC64 themselves. + const stream = await this.extentStore.readExtent( + persistency, + context.contextId + ); + const { crc64: calculatedCRC64 } = + await computeAndValidateTransactionalChecksums( + stream, + { md5: contentMD5, crc64: contentCRC64 }, + context.contextId, + { crc64: contentMD5 === undefined } ); - if (contentMD5 !== undefined && contentCRC64 !== undefined) { - const result = await computeTransactionalChecksums(stream); - calculatedContentMD5 = result.md5; - calculatedCRC64 = result.crc64; - } else if (contentMD5 !== undefined) { - calculatedContentMD5 = await getMD5FromStream(stream); - } else { - calculatedCRC64 = await getCRC64FromStream(stream); - } - } - - if (contentMD5 !== undefined) { - if (typeof contentMD5 === "string") { - const calculatedContentMD5String = Buffer.from( - calculatedContentMD5! - ).toString("base64"); - if (contentMD5 !== calculatedContentMD5String) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided contentMD5 doesn't match." - ); - } - } else { - if (!Buffer.from(contentMD5).equals(calculatedContentMD5!)) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided contentMD5 doesn't match." - ); - } - } - } - - if (contentCRC64 !== undefined) { - if (!Buffer.from(contentCRC64).equals(Buffer.from(calculatedCRC64!))) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided transactional CRC64 doesn't match." - ); - } - } const block: BlockModel = { accountName, @@ -282,7 +233,7 @@ export default class BlockBlobHandler const response: Models.BlockBlobStageBlockResponse = { statusCode: 201, contentMD5: undefined, // TODO: Block content MD5 - xMsContentCrc64: contentCRC64 !== undefined ? calculatedCRC64 : undefined, + xMsContentCrc64: calculatedCRC64, requestId: blobCtx.contextId, version: BLOB_API_VERSION, date, diff --git a/src/blob/utils/utils.ts b/src/blob/utils/utils.ts index 46b6705fe..9372c1608 100644 --- a/src/blob/utils/utils.ts +++ b/src/blob/utils/utils.ts @@ -4,6 +4,61 @@ import StorageErrorFactory from "../errors/StorageErrorFactory"; import { USERDELEGATIONKEY_BASIC_KEY } from "./constants"; import { BlobTag, BlobTags } from "@azure/storage-blob"; import { TagContent } from "../persistence/QueryInterpreter/QueryNodes/IQueryNode"; +import { computeTransactionalChecksums } from "../../common/utils/utils"; + +/** + * Computes MD5 and/or CRC-64/NVME from a stream in a single pass and validates + * against the request-supplied values. Throws Md5Mismatch / Crc64Mismatch + * (HTTP 400) on mismatch — the documented Azure Storage error codes for + * transactional integrity failures. + * + * Rejects requests that supply both checksums with `BothCrc64AndMd5HeaderPresent` + * (HTTP 400), matching the real Azure service contract. + * + * A checksum is computed when its `expected` value is provided, OR when the + * corresponding `force` flag is set (for callers that need the value for + * non-validation purposes — e.g. Put Blob persists MD5 as a blob property). + */ +export async function computeAndValidateTransactionalChecksums( + stream: NodeJS.ReadableStream, + expected: { md5?: Uint8Array | string; crc64?: Uint8Array }, + contextId: string | undefined, + force?: { md5?: boolean; crc64?: boolean } +): Promise<{ md5?: Uint8Array; crc64?: Uint8Array }> { + if (expected.md5 !== undefined && expected.crc64 !== undefined) { + throw StorageErrorFactory.getBothCrc64AndMd5HeaderPresent(contextId); + } + if (expected.md5 !== undefined) { + const md5Bytes = + typeof expected.md5 === "string" + ? Buffer.from(expected.md5, "base64") + : Buffer.from(expected.md5); + if (md5Bytes.length !== 16) { + throw StorageErrorFactory.getInvalidMd5(contextId); + } + } + const calculated = await computeTransactionalChecksums(stream, expected, force); + + if (expected.md5 !== undefined) { + const expectedMd5 = + typeof expected.md5 === "string" + ? expected.md5 + : Buffer.from(expected.md5).toString("base64"); + const calculatedMd5 = Buffer.from(calculated.md5!).toString("base64"); + if (expectedMd5 !== calculatedMd5) { + throw StorageErrorFactory.getMd5Mismatch(contextId, expectedMd5, calculatedMd5); + } + } + if (expected.crc64 !== undefined) { + const expectedCrc64 = Buffer.from(expected.crc64).toString("base64"); + const calculatedCrc64 = Buffer.from(calculated.crc64!).toString("base64"); + if (expectedCrc64 !== calculatedCrc64) { + throw StorageErrorFactory.getCrc64Mismatch(contextId, expectedCrc64, calculatedCrc64); + } + } + + return calculated; +} export function checkApiVersion( inputApiVersion: string, diff --git a/src/common/utils/utils.ts b/src/common/utils/utils.ts index 7450e1979..c0c6fe5b3 100644 --- a/src/common/utils/utils.ts +++ b/src/common/utils/utils.ts @@ -170,27 +170,42 @@ export async function getMD5FromStream( }); } -// CRC-64/ECMA-182 implementation for Azure Storage transactional integrity checks. -// Algorithm and lookup-table approach adapted from the Azure Storage JavaScript SDK (MIT License): -// https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/storage/storage-blob/src/utils/crc64.ts -// Polynomial: 0x42F0E1EBA9EA3693 (ECMA-182 standard, unreflected, init=0, xorout=0) -// Represented as two 32-bit halves (hi, lo) to avoid BigInt. -const CRC64_POLY_HI = 0x42f0e1eb; -const CRC64_POLY_LO = 0xa9ea3693; +// CRC-64/NVME implementation for Azure Storage transactional integrity checks. +// This is the variant the Azure Blob service uses on x-ms-content-crc64; the +// wire format is little-endian (LSB byte first). +// +// Parameters: +// width = 64 +// poly = 0xad93d23594c93659 (reflected form: 0x9a6c9329ac4bc9b5) +// init = 0xffffffffffffffff +// refin = true +// refout = true +// xorout = 0xffffffffffffffff +// check = 0xae8b14860a799888 ("123456789") +// +// Represented as two 32-bit halves (hi, lo) so we don't need BigInt — Azurite +// supports Node engines down to 10.0.0 where BigInt isn't reliable. Since this +// is a reflected (right-shift) CRC, `lo` holds the bits that get consumed by +// the next input byte. +const CRC64_POLY_HI = 0x9a6c9329; +const CRC64_POLY_LO = 0xac4bc9b5; // Flat table: entry i occupies [i*2] (hi) and [i*2+1] (lo). const CRC64_TABLE: readonly number[] = (() => { const table: number[] = new Array(512); for (let i = 0; i < 256; i++) { - let hi = (i << 24) >>> 0; - let lo = 0; + let hi = 0; + let lo = i; for (let j = 0; j < 8; j++) { - if ((hi & 0x80000000) !== 0) { - hi = (((hi << 1) | (lo >>> 31)) ^ CRC64_POLY_HI) >>> 0; - lo = ((lo << 1) ^ CRC64_POLY_LO) >>> 0; + const xorPoly = (lo & 1) !== 0; + const newLo = ((hi & 1) << 31) | (lo >>> 1); + const newHi = hi >>> 1; + if (xorPoly) { + hi = (newHi ^ CRC64_POLY_HI) >>> 0; + lo = (newLo ^ CRC64_POLY_LO) >>> 0; } else { - hi = ((hi << 1) | (lo >>> 31)) >>> 0; - lo = (lo << 1) >>> 0; + hi = newHi >>> 0; + lo = newLo >>> 0; } } table[i * 2] = hi; @@ -203,24 +218,31 @@ function crc64Accumulate( crcHi: number, crcLo: number, chunk: Uint8Array ): [number, number] { for (let i = 0; i < chunk.length; i++) { - const index = ((crcHi >>> 24) ^ chunk[i]) & 0xff; + const index = (crcLo ^ chunk[i]) & 0xff; const tHi = CRC64_TABLE[index * 2]; const tLo = CRC64_TABLE[index * 2 + 1]; - crcHi = (((crcHi << 8) | (crcLo >>> 24)) ^ tHi) >>> 0; - crcLo = ((crcLo << 8) ^ tLo) >>> 0; + const newLo = ((crcHi & 0xff) << 24) | (crcLo >>> 8); + const newHi = crcHi >>> 8; + crcHi = (newHi ^ tHi) >>> 0; + crcLo = (newLo ^ tLo) >>> 0; } return [crcHi, crcLo]; } +// Initial CRC state is 0 XOR 0xFFFFFFFFFFFFFFFF = 0xFFFFFFFF_FFFFFFFF. +const CRC64_INIT_HI = 0xffffffff; +const CRC64_INIT_LO = 0xffffffff; + function crc64ToUint8Array(hi: number, lo: number): Uint8Array { + // Apply xorout (0xFFFFFFFFFFFFFFFF) and serialize little-endian: LSB first. const buf = Buffer.allocUnsafe(8); - buf.writeUInt32BE(hi >>> 0, 0); - buf.writeUInt32BE(lo >>> 0, 4); + buf.writeUInt32LE((lo ^ 0xffffffff) >>> 0, 0); + buf.writeUInt32LE((hi ^ 0xffffffff) >>> 0, 4); return buf; } export function getCRC64FromString(text: string): Uint8Array { - const [hi, lo] = crc64Accumulate(0, 0, Buffer.from(text)); + const [hi, lo] = crc64Accumulate(CRC64_INIT_HI, CRC64_INIT_LO, Buffer.from(text)); return crc64ToUint8Array(hi, lo); } @@ -228,7 +250,7 @@ export async function getCRC64FromStream( stream: NodeJS.ReadableStream ): Promise { return new Promise((resolve, reject) => { - let hi = 0, lo = 0; + let hi = CRC64_INIT_HI, lo = CRC64_INIT_LO; stream .on("data", (chunk: Buffer | string) => { const data = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string); @@ -242,23 +264,36 @@ export async function getCRC64FromStream( } /** - * Computes MD5 and CRC-64/ECMA-182 in a single stream pass, avoiding - * reading the extent twice when both checksums may be needed. + * Computes MD5 and/or CRC-64/NVME in a single stream pass. A checksum is + * computed when the corresponding `expected` value is provided OR when `force` + * is set for that field. The other is returned as undefined. + * + * `expected` is the caller's request-supplied value (only its presence matters + * here; comparison happens at the caller). `force` is for callers that need a + * checksum for purposes other than validation — e.g. Put Blob always needs MD5 + * because it's persisted as the blob's contentMD5 property. */ export async function computeTransactionalChecksums( - stream: NodeJS.ReadableStream -): Promise<{ md5: Uint8Array; crc64: Uint8Array }> { - const hash = createHash("md5"); + stream: NodeJS.ReadableStream, + expected: { md5?: Uint8Array | string; crc64?: Uint8Array }, + force?: { md5?: boolean; crc64?: boolean } +): Promise<{ md5?: Uint8Array; crc64?: Uint8Array }> { + const needMd5 = expected.md5 !== undefined || !!force?.md5; + const needCrc64 = expected.crc64 !== undefined || !!force?.crc64; + const hash = needMd5 ? createHash("md5") : undefined; return new Promise((resolve, reject) => { - let hi = 0, lo = 0; + let hi = CRC64_INIT_HI, lo = CRC64_INIT_LO; stream .on("data", (chunk: Buffer | string) => { const data = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string); - hash.update(data); - [hi, lo] = crc64Accumulate(hi, lo, data); + if (hash) hash.update(data); + if (needCrc64) [hi, lo] = crc64Accumulate(hi, lo, data); }) .on("end", () => { - resolve({ md5: hash.digest(), crc64: crc64ToUint8Array(hi, lo) }); + resolve({ + md5: hash ? hash.digest() : undefined, + crc64: needCrc64 ? crc64ToUint8Array(hi, lo) : undefined, + }); }) .on("error", reject); }); diff --git a/tests/BlobTestServerFactory.ts b/tests/BlobTestServerFactory.ts index d8c4311cf..79f82da2a 100644 --- a/tests/BlobTestServerFactory.ts +++ b/tests/BlobTestServerFactory.ts @@ -5,6 +5,19 @@ import SqlBlobServer from "../src/blob/SqlBlobServer"; import { StoreDestinationArray } from "../src/common/persistence/IExtentStore"; import { DEFAULT_SQL_OPTIONS } from "../src/common/utils/constants"; import { DEFAULT_BLOB_KEEP_ALIVE_TIMEOUT } from "../src/blob/utils/constants"; +import { LIVE_TEST_MODE } from "./testutils"; + +/** + * No-op stand-in returned in live mode. Tests call start/close/clean on the + * "server", but in live mode there's no local server to manage — we just need + * an object with a `config` whose host/port the test fixture can read. + */ +class LiveModeStubServer { + public readonly config = { host: "live.azure", port: 443 }; + public async start(): Promise { /* no-op */ } + public async close(): Promise { /* no-op */ } + public async clean(): Promise { /* no-op */ } +} export default class BlobTestServerFactory { public createServer( @@ -13,6 +26,9 @@ export default class BlobTestServerFactory { https: boolean = false, oauth?: string ): BlobServer | SqlBlobServer { + if (LIVE_TEST_MODE) { + return new LiveModeStubServer() as unknown as BlobServer; + } const databaseConnectionString = process.env.AZURITE_TEST_DB; const isSQL = databaseConnectionString !== undefined; const inMemoryPersistence = process.env.AZURITE_TEST_INMEMORYPERSISTENCE !== undefined; diff --git a/tests/blob/apis/appendblob.test.ts b/tests/blob/apis/appendblob.test.ts index 786250927..56b8aac8c 100644 --- a/tests/blob/apis/appendblob.test.ts +++ b/tests/blob/apis/appendblob.test.ts @@ -8,12 +8,17 @@ import * as assert from "assert"; import { BlobType } from "../../../src/blob/generated/artifacts/models"; import { configLogger } from "../../../src/common/Logger"; -import { getMD5FromString } from "../../../src/common/utils/utils"; +import { + getCRC64FromString, + getMD5FromString +} from "../../../src/common/utils/utils"; +import * as crypto from "crypto"; import BlobTestServerFactory from "../../BlobTestServerFactory"; import { bodyToString, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, sleep } from "../../testutils"; @@ -25,7 +30,7 @@ describe("AppendBlobAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( @@ -451,6 +456,100 @@ describe("AppendBlobAPIs", () => { assert.deepStrictEqual(string, "abcdef123456T@"); }); + it("AppendBlock with correct crc64 should succeed and echo crc64 @loki", async () => { + await appendBlobClient.create(); + const body = "HelloWorld"; + const crc64 = getCRC64FromString(body); + + const result = await appendBlobClient.appendBlock(body, body.length, { + transactionalContentCrc64: new Uint8Array(crc64) + }); + + assert.equal(result._response.status, 201); + assert.ok( + result.xMsContentCrc64 !== undefined, + "Response should include x-ms-content-crc64" + ); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(crc64), + "Echoed CRC64 must match what was sent" + ); + }); + + it("AppendBlock with wrong crc64 should throw mismatch @loki", async () => { + await appendBlobClient.create(); + const body = "HelloWorld"; + const wrongCrc64 = getCRC64FromString("differentBody"); + + try { + await appendBlobClient.appendBlock(body, body.length, { + transactionalContentCrc64: new Uint8Array(wrongCrc64) + }); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Crc64Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("AppendBlock with wrong md5 should throw mismatch @loki", async () => { + await appendBlobClient.create(); + const body = "HelloWorld"; + const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + + try { + await appendBlobClient.appendBlock(body, body.length, { + transactionalContentMD5: new Uint8Array(wrongMd5) + }); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Md5Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("AppendBlock without any checksum header should still echo computed crc64 @loki", async () => { + // Per the Append Block REST contract, the service always computes a CRC64 + // of the appended block and returns it in x-ms-content-crc64, even when + // the client didn't supply one. The echoed value must match the canonical + // CRC-64/NVME. + await appendBlobClient.create(); + const body = "HelloWorld"; + const result = await appendBlobClient.appendBlock(body, body.length); + assert.equal(result._response.status, 201); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(getCRC64FromString(body)) + ); + }); + + it("AppendBlock with both md5 and crc64 supplied should be rejected @loki", async () => { + // Real Azure rejects requests that supply both Content-MD5 and + // x-ms-content-crc64 — Azurite must match. + await appendBlobClient.create(); + const body = "HelloWorld"; + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const crc64 = getCRC64FromString(body); + + try { + await appendBlobClient.appendBlock(body, body.length, { + transactionalContentMD5: new Uint8Array(md5), + transactionalContentCrc64: new Uint8Array(crc64) + }); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); + return; + } + assert.fail("Did not throw an exception."); + }); + it("AppendBlock with ifTags should work @loki", async () => { await appendBlobClient.create(); diff --git a/tests/blob/apis/blob.test.ts b/tests/blob/apis/blob.test.ts index 6c4358bda..b184de0cd 100644 --- a/tests/blob/apis/blob.test.ts +++ b/tests/blob/apis/blob.test.ts @@ -4,9 +4,11 @@ import { newPipeline, BlobServiceClient, BlobItem, + BlobSASPermissions, Tags } from "@azure/storage-blob"; import * as assert from "assert"; +import * as crypto from "crypto"; import { BlobCopySourceTags, BlobHTTPHeaders } from "../../../src/blob/generated/artifacts/models"; import { configLogger } from "../../../src/common/Logger"; @@ -15,6 +17,7 @@ import { bodyToString, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, sleep } from "../../testutils"; @@ -28,7 +31,7 @@ describe("BlobAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( @@ -1661,6 +1664,57 @@ describe("BlobAPIs", () => { ); }); + it("Synchronized copy blob echoes source Content-MD5 in response when supplied @loki", async () => { + // Per the Copy Blob From URL REST contract, when the client supplies + // x-ms-source-content-md5 the service echoes it back as Content-MD5 on + // the response (so the client can correlate against the source's hash). + // Real Azure requires the source URL to carry auth — generate a read SAS + // (which the emulator also accepts). + const sourceBlob = getUniqueName("blob"); + const destBlob = getUniqueName("blob"); + + const sourceBlobClient = containerClient.getBlockBlobClient(sourceBlob); + const destBlobClient = containerClient.getBlockBlobClient(destBlob); + + const body = "hello"; + await sourceBlobClient.upload(body, body.length); + const sourceUrl = await sourceBlobClient.generateSasUrl({ + permissions: BlobSASPermissions.parse("r"), + expiresOn: new Date(Date.now() + 60 * 60 * 1000) + }); + + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const result_copy = await destBlobClient.syncCopyFromURL(sourceUrl, { + sourceContentMD5: new Uint8Array(md5) + }); + + assert.equal(result_copy.copyStatus, "success"); + assert.deepStrictEqual( + Buffer.from(result_copy.contentMD5!), + Buffer.from(md5), + "Response Content-MD5 must echo the source-supplied value" + ); + }); + + it("Synchronized copy blob omits Content-MD5 in response when not supplied @loki", async () => { + // Without x-ms-source-content-md5, the response does not include Content-MD5. + const sourceBlob = getUniqueName("blob"); + const destBlob = getUniqueName("blob"); + + const sourceBlobClient = containerClient.getBlockBlobClient(sourceBlob); + const destBlobClient = containerClient.getBlockBlobClient(destBlob); + + await sourceBlobClient.upload("hello", 5); + const sourceUrl = await sourceBlobClient.generateSasUrl({ + permissions: BlobSASPermissions.parse("r"), + expiresOn: new Date(Date.now() + 60 * 60 * 1000) + }); + + const result_copy = await destBlobClient.syncCopyFromURL(sourceUrl); + assert.equal(result_copy.copyStatus, "success"); + assert.strictEqual(result_copy.contentMD5, undefined); + }); + it("Synchronized copy blob should work to override metadata @loki", async () => { const sourceBlob = getUniqueName("blob"); const destBlob = getUniqueName("blob"); diff --git a/tests/blob/apis/blobbatch.test.ts b/tests/blob/apis/blobbatch.test.ts index 594dcb5a6..ddadf40e5 100644 --- a/tests/blob/apis/blobbatch.test.ts +++ b/tests/blob/apis/blobbatch.test.ts @@ -12,7 +12,7 @@ import { import assert from "assert"; import { configLogger } from "../../../src/common/Logger"; import BlobTestServerFactory from "../../BlobTestServerFactory"; -import { EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, getUniqueName } from "../../testutils"; +import { EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, getTestServerBaseURL, getUniqueName } from "../../testutils"; // Set true to enable debug log configLogger(false); @@ -21,7 +21,7 @@ describe("Blob batch API", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index 1376475fc..e5786d45a 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -15,6 +15,7 @@ import { bodyToString, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, sleep } from "../../testutils"; @@ -30,7 +31,7 @@ describe("BlockBlobAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( @@ -143,6 +144,98 @@ describe("BlockBlobAPIs", () => { assert.deepStrictEqual(await bodyToString(result, 0), ""); }); + // ---------------------------------------------------------------------- + // Transactional checksum tests (Put Blob + Stage Block) + // + // Every error code, response header, and behavior asserted in this block + // is verified against the real Azure Blob service: set + // AZURITE_LIVE_TEST_CONNECTION_STRING and these same tests run against the + // live account (see tests/testutils.ts header). The assertions are + // therefore statements about the Azure REST contract, not Azurite-internal + // conventions. Specifically pinned here against live: + // - CRC-64/NVME algorithm + little-endian wire format (was ECMA-182/BE) + // - Md5Mismatch and Crc64Mismatch error codes (was generic InvalidOperation) + // - BothCrc64AndMd5HeaderPresent (HTTP 400) when both headers supplied + // - x-ms-content-crc64 always echoed on Stage Block response (computed + // server-side even when the client didn't send one) + // ---------------------------------------------------------------------- + + it("upload (PutBlob) with correct crc64 should succeed @loki @sql", async () => { + // BlockBlobClient.upload's runtime DOES forward transactionalContentCrc64 + // (via setUploadChecksumParameters), but the public BlockBlobUploadOptions + // interface omits the field — purely a TypeScript surface gap. We reach the + // generated context directly to bypass the typed-surface omission. + // SDK fix proposed in https://github.com/Azure/azure-sdk-for-js/pull/38490. + const body = "HelloWorld"; + const crc64 = getCRC64FromString(body); + const result = await (blockBlobClient as any).blockBlobContext.upload( + body.length, + body, + { transactionalContentCrc64: new Uint8Array(crc64) } + ); + assert.equal(result._response.status, 201); + + const downloaded = await blobClient.download(0); + assert.deepStrictEqual(await bodyToString(downloaded, body.length), body); + }); + + it("upload (PutBlob) with wrong crc64 should throw mismatch @loki @sql", async () => { + const body = "HelloWorld"; + const wrongCrc64 = getCRC64FromString("differentBody"); + try { + await (blockBlobClient as any).blockBlobContext.upload( + body.length, + body, + { transactionalContentCrc64: new Uint8Array(wrongCrc64) } + ); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Crc64Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("upload (PutBlob) with wrong md5 should throw mismatch @loki @sql", async () => { + const body = "HelloWorld"; + const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + try { + await (blockBlobClient as any).blockBlobContext.upload( + body.length, + body, + { transactionalContentMD5: new Uint8Array(wrongMd5) } + ); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Md5Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("upload (PutBlob) with both md5 and crc64 supplied should be rejected @loki @sql", async () => { + // Real Azure rejects requests that supply both Content-MD5 and + // x-ms-content-crc64 — Azurite must match. + const body = "HelloWorld"; + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const crc64 = getCRC64FromString(body); + try { + await (blockBlobClient as any).blockBlobContext.upload( + body.length, + body, + { + transactionalContentMD5: new Uint8Array(md5), + transactionalContentCrc64: new Uint8Array(crc64) + } + ); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); + return; + } + assert.fail("Did not throw an exception."); + }); + it("upload with string body and all parameters set @loki @sql", async () => { const body: string = getUniqueName("randomstring"); const options = { @@ -277,8 +370,10 @@ describe("BlockBlobAPIs", () => { it("stageBlock with wrong body should throw md5 mismatch @loki @sql", async () => { const body = "HelloWorld"; - const md5 = new Uint8Array(Buffer.from("anotherBody")); - const options = { transactionalContentMD5: md5 }; + // A valid 16-byte MD5 of a *different* body, to exercise the mismatch + // path rather than the InvalidMd5 (wrong-length) path. + const md5 = crypto.createHash("md5").update("anotherBody", "utf8").digest(); + const options = { transactionalContentMD5: new Uint8Array(md5) }; try { await blockBlobClient.stageBlock( @@ -290,10 +385,7 @@ describe("BlockBlobAPIs", () => { } catch (e) { assert.equal(e.name, "RestError"); assert.equal(e.statusCode, 400); - assert.equal( - e.details.message.indexOf("Provided contentMD5 doesn't match."), - 0 - ); + assert.equal(e.code, "Md5Mismatch"); return; } assert.fail("Did not throw an exception."); @@ -365,18 +457,43 @@ describe("BlockBlobAPIs", () => { } catch (e) { assert.equal(e.name, "RestError"); assert.equal(e.statusCode, 400); - assert.equal( - e.details.message.indexOf("Provided transactional CRC64 doesn't match."), - 0 + assert.equal(e.code, "Crc64Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("stageBlock with both md5 and crc64 supplied should be rejected @loki @sql", async () => { + // Real Azure rejects requests that supply both Content-MD5 and + // x-ms-content-crc64 — Azurite must match. + const body = "HelloWorld"; + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const crc64 = getCRC64FromString(body); + const options = { + transactionalContentMD5: new Uint8Array(md5), + transactionalContentCrc64: new Uint8Array(crc64) + }; + + try { + await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options ); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); return; } assert.fail("Did not throw an exception."); }); - it("stageBlock without crc64 header should not include crc64 in response @loki @sql", async () => { - // When no x-ms-content-crc64 is sent the response must not include one, - // matching the behaviour of the real service. + it("stageBlock without any checksum header should still echo computed crc64 @loki @sql", async () => { + // Per the Put Block REST contract, the service computes a CRC64 of the + // block and returns it in x-ms-content-crc64 even when the client didn't + // supply one. The echoed value must match the canonical CRC-64/NVME. const body = "HelloWorld"; const result = await blockBlobClient.stageBlock( base64encode("1"), @@ -384,7 +501,10 @@ describe("BlockBlobAPIs", () => { body.length ); assert.equal(result._response.status, 201); - assert.strictEqual(result.xMsContentCrc64, undefined); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(getCRC64FromString(body)) + ); }); it("commitBlockList @loki @sql", async () => { diff --git a/tests/blob/apis/container.test.ts b/tests/blob/apis/container.test.ts index 044fcad30..933e7db2c 100644 --- a/tests/blob/apis/container.test.ts +++ b/tests/blob/apis/container.test.ts @@ -18,6 +18,7 @@ import { base64encode, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, sleep } from "../../testutils"; @@ -30,7 +31,7 @@ describe("ContainerAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( diff --git a/tests/blob/apis/pageblob.test.ts b/tests/blob/apis/pageblob.test.ts index 66781d21b..a763c6524 100644 --- a/tests/blob/apis/pageblob.test.ts +++ b/tests/blob/apis/pageblob.test.ts @@ -13,6 +13,7 @@ import { bodyToString, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName } from "../../testutils"; import { getMD5FromString } from "../../../src/common/utils/utils"; @@ -24,7 +25,7 @@ describe("PageBlobAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( diff --git a/tests/blob/blockblob.highlevel.test.ts b/tests/blob/blockblob.highlevel.test.ts index 9bdb29133..c439dc369 100644 --- a/tests/blob/blockblob.highlevel.test.ts +++ b/tests/blob/blockblob.highlevel.test.ts @@ -15,6 +15,7 @@ import { createRandomLocalFile, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, readStreamToLocalFile, rmRecursive @@ -29,7 +30,7 @@ describe("BlockBlobHighlevel", () => { // Loose model to bypass if-match header used by download retry const server = factory.createServer(true); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( diff --git a/tests/blob/utils.test.ts b/tests/blob/utils.test.ts index 6826c6363..f2a22e1a9 100644 --- a/tests/blob/utils.test.ts +++ b/tests/blob/utils.test.ts @@ -64,12 +64,13 @@ describe("Utils", () => { }); describe("CRC64", () => { - // CRC-64/ECMA-182 check value for "123456789" per the CRC catalogue: - // https://reveng.sourceforge.io/crc-catalogue/all.htm - it("getCRC64FromString matches the standard CRC-64/ECMA-182 check value for '123456789'", () => { + // CRC-64/NVME check value for "123456789" per the CRC catalogue: + // https://reveng.sourceforge.io/crc-catalogue/all.htm — the numeric value is + // 0xae8b14860a799888, serialised on the wire as 8 little-endian bytes. + it("getCRC64FromString matches the standard CRC-64/NVME check value for '123456789'", () => { const result = getCRC64FromString("123456789"); const hex = Buffer.from(result).toString("hex"); - assert.strictEqual(hex, "6c40df5f0b497347"); + assert.strictEqual(hex, "8898790a86148bae"); }); it("getCRC64FromString produces an 8-byte result", () => { diff --git a/tests/testutils.ts b/tests/testutils.ts index fc3966511..4a0079b9d 100644 --- a/tests/testutils.ts +++ b/tests/testutils.ts @@ -6,10 +6,101 @@ import { join } from "path"; import rimraf from "rimraf"; import { URL } from "url"; -export const EMULATOR_ACCOUNT_NAME = "devstoreaccount1"; +// ---- Live Azure mode ------------------------------------------------------- +// +// Why this exists: +// Azurite is meant to emulate the real Azure Blob service. Tests assert on +// error codes (Md5Mismatch, Crc64Mismatch, BothCrc64AndMd5HeaderPresent, +// InvalidMd5, InvalidHeaderValue, ...), checksum byte order, response-echo +// fields, etc. — and many of these expectations were originally taken from +// either documentation or "what makes sense", which is how Azurite ended up +// with subtle drifts from the real service (wrong CRC64 variant, big-endian +// bytes vs little-endian, generic InvalidOperation in place of typed +// Md5Mismatch, etc.). +// +// Routing the *same* test through real Azure is the only practical way to +// pin assertions to real-service behavior. When a test like +// `stageBlock with wrong body should throw md5 mismatch` passes both against +// the local Azurite server and against a real Azure account, the assertion +// is a verified statement about the service contract — not an Azurite-only +// convention. If a test only passes against Azurite, we know it's drift. +// +// How it works: +// Set AZURITE_LIVE_TEST_CONNECTION_STRING to a full storage account +// connection string. The harness then: +// - has `BlobTestServerFactory.createServer()` return a no-op stub +// (no local server starts/stops/cleans), +// - swaps `EMULATOR_ACCOUNT_NAME` / `EMULATOR_ACCOUNT_KEY` to the live +// account's credentials, +// - has `getTestServerBaseURL(server)` produce +// `https://.blob.core.windows.net` (no `/devstoreaccount1`). +// Tests that build their service client via these symbols therefore work +// against either backend without any per-test branching. +// +// Per-test files build their service-client base URL via `getTestServerBaseURL` +// (rather than the inline `http://host:port/devstoreaccount1` template), +// which routes correctly in both modes. + +function parseLiveConnectionString(cs: string): { + accountName: string; + accountKey: string; + blobEndpoint: string; +} { + const parts = new Map(); + for (const segment of cs.split(";")) { + const eq = segment.indexOf("="); + if (eq > 0) parts.set(segment.slice(0, eq).trim(), segment.slice(eq + 1).trim()); + } + const accountName = parts.get("AccountName"); + const accountKey = parts.get("AccountKey"); + const protocol = parts.get("DefaultEndpointsProtocol") || "https"; + const suffix = parts.get("EndpointSuffix") || "core.windows.net"; + if (!accountName || !accountKey) { + throw new Error( + "AZURITE_LIVE_TEST_CONNECTION_STRING is missing AccountName or AccountKey." + ); + } + const blobEndpoint = (parts.get("BlobEndpoint") || + `${protocol}://${accountName}.blob.${suffix}`).replace(/\/$/, ""); + return { accountName, accountKey, blobEndpoint }; +} + +const liveConnectionString = process.env.AZURITE_LIVE_TEST_CONNECTION_STRING || undefined; + +export const LIVE_TEST_MODE = liveConnectionString !== undefined; + +const liveConfig = liveConnectionString + ? parseLiveConnectionString(liveConnectionString) + : undefined; + +export const EMULATOR_ACCOUNT_NAME = + liveConfig?.accountName ?? "devstoreaccount1"; export const EMULATOR_ACCOUNT_KEY = + liveConfig?.accountKey ?? "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; +/** + * Builds the blob service base URL for a test fixture. In emulator mode this + * is `http://:/devstoreaccount1`; in live mode it's the real + * account's blob endpoint (e.g. `https://.blob.core.windows.net`). + * + * Pass `https: true` for the few tests that explicitly need HTTPS against the + * emulator (oauth/https tests); ignored in live mode where HTTPS is always used. + */ +export function getTestServerBaseURL( + server: { config: { host: string; port: number } }, + options: { https?: boolean; accountPathSuffix?: string } = {} +): string { + if (liveConfig) { + return options.accountPathSuffix + ? `${liveConfig.blobEndpoint}${options.accountPathSuffix}` + : liveConfig.blobEndpoint; + } + const protocol = options.https ? "https" : "http"; + const suffix = options.accountPathSuffix ?? "/devstoreaccount1"; + return `${protocol}://${server.config.host}:${server.config.port}${suffix}`; +} + // Counter-based suffix instead of Math.random() to guarantee uniqueness within // a test run. Random suffixes can collide when multiple entities are created // within the same millisecond on fast CI runners, causing flaky batch tests. From 91d999c2a6744930226695ac889414dc00698425 Mon Sep 17 00:00:00 2001 From: kristofr Date: Mon, 11 May 2026 16:32:27 +0200 Subject: [PATCH 05/11] fix comments and make sure page blob is also consistent with behavior --- src/blob/handlers/BlockBlobHandler.ts | 22 ++++++- src/blob/handlers/PageBlobHandler.ts | 28 ++++++++- src/blob/utils/utils.ts | 39 +++++++++---- src/common/utils/utils.ts | 4 +- tests/BlobTestServerFactory.ts | 2 +- tests/blob/apis/appendblob.test.ts | 2 +- tests/blob/apis/blob.test.ts | 7 ++- tests/blob/apis/blockblob.test.ts | 29 ++------- tests/blob/apis/pageblob.test.ts | 84 ++++++++++++++++++++++++++- tests/blob/utils.test.ts | 4 +- tests/testutils.ts | 34 ++--------- 11 files changed, 180 insertions(+), 75 deletions(-) diff --git a/src/blob/handlers/BlockBlobHandler.ts b/src/blob/handlers/BlockBlobHandler.ts index f5cdab2eb..7e1f7b8fa 100644 --- a/src/blob/handlers/BlockBlobHandler.ts +++ b/src/blob/handlers/BlockBlobHandler.ts @@ -13,7 +13,11 @@ import { parseXML } from "../generated/utils/xml"; import { BlobModel, BlockModel } from "../persistence/IBlobMetadataStore"; import { BLOB_API_VERSION } from "../utils/constants"; import BaseHandler from "./BaseHandler"; -import { computeAndValidateTransactionalChecksums, getTagsFromString } from "../utils/utils"; +import { + computeAndValidateTransactionalChecksums, + getTagsFromString, + isValidMd5Header +} from "../utils/utils"; /** * BlobHandler handles Azure Storage BlockBlob related requests. @@ -44,6 +48,22 @@ export default class BlockBlobHandler options.blobHTTPHeaders.blobContentType || context.request!.getHeader("content-type") || "application/octet-stream"; + + // x-ms-blob-content-md5 is a blob property header (stored as the blob's + // contentMD5 metadata). Real Azure rejects malformed values with + // InvalidHeaderValue (HTTP 400). Validate format here - the transactional + // helper validates Content-MD5 separately with InvalidMd5. + const blobContentMD5Header = context.request!.getHeader("x-ms-blob-content-md5"); + if ( + typeof blobContentMD5Header === "string" && + !isValidMd5Header(blobContentMD5Header) + ) { + throw StorageErrorFactory.getInvalidHeaderValue(context.contextId!, { + HeaderName: "x-ms-blob-content-md5", + HeaderValue: blobContentMD5Header + }); + } + const contentMD5 = context.request!.getHeader("content-md5") || context.request!.getHeader("x-ms-blob-content-md5") ? options.blobHTTPHeaders.blobContentMD5 || diff --git a/src/blob/handlers/PageBlobHandler.ts b/src/blob/handlers/PageBlobHandler.ts index 429d860ef..0a68a80e3 100644 --- a/src/blob/handlers/PageBlobHandler.ts +++ b/src/blob/handlers/PageBlobHandler.ts @@ -12,8 +12,12 @@ import BlobWriteLeaseValidator from "../lease/BlobWriteLeaseValidator"; import IBlobMetadataStore, { BlobModel } from "../persistence/IBlobMetadataStore"; -import { BLOB_API_VERSION } from "../utils/constants"; -import { deserializePageBlobRangeHeader, getTagsFromString } from "../utils/utils"; +import { BLOB_API_VERSION, HeaderConstants } from "../utils/constants"; +import { + computeAndValidateTransactionalChecksums, + deserializePageBlobRangeHeader, + getTagsFromString +} from "../utils/utils"; import BaseHandler from "./BaseHandler"; import IPageBlobRangesManager from "./IPageBlobRangesManager"; @@ -236,6 +240,19 @@ export default class PageBlobHandler extends BaseHandler ); } + // Transactional integrity validation. Real Azure always returns a + // server-computed x-ms-content-crc64 on Put Page; force CRC64 always. + const contentMD5 = blobCtx.request!.getHeader(HeaderConstants.CONTENT_MD5); + const contentCRC64 = options.transactionalContentCrc64; + const stream = await this.extentStore.readExtent(persistency, blobCtx.contextId); + const { crc64: calculatedCRC64 } = + await computeAndValidateTransactionalChecksums( + stream, + { md5: contentMD5, crc64: contentCRC64 }, + context.contextId, + { crc64: true } + ); + const res = await this.metadataStore.uploadPages( context, blob, @@ -251,7 +268,12 @@ export default class PageBlobHandler extends BaseHandler statusCode: 201, eTag: res.etag, lastModified: date, - contentMD5: undefined, // TODO + contentMD5: contentMD5 === undefined + ? undefined + : typeof contentMD5 === "string" + ? Buffer.from(contentMD5, "base64") + : contentMD5, + xMsContentCrc64: calculatedCRC64, blobSequenceNumber: res.blobSequenceNumber, requestId: blobCtx.contextId, version: BLOB_API_VERSION, diff --git a/src/blob/utils/utils.ts b/src/blob/utils/utils.ts index 9372c1608..748be124f 100644 --- a/src/blob/utils/utils.ts +++ b/src/blob/utils/utils.ts @@ -6,10 +6,25 @@ import { BlobTag, BlobTags } from "@azure/storage-blob"; import { TagContent } from "../persistence/QueryInterpreter/QueryNodes/IQueryNode"; import { computeTransactionalChecksums } from "../../common/utils/utils"; +/** + * Decodes an MD5 header value (base64 string or raw Uint8Array) and returns + * whether the result is exactly 16 bytes - the only shape real Azure accepts. + * Callers throw context-appropriate errors: + * - Content-MD5 / transactionalContentMD5 -> InvalidMd5 + * - x-ms-blob-content-md5 -> InvalidHeaderValue + */ +export function isValidMd5Header(value: Uint8Array | string): boolean { + const bytes = + typeof value === "string" + ? Buffer.from(value, "base64") + : Buffer.from(value); + return bytes.length === 16; +} + /** * Computes MD5 and/or CRC-64/NVME from a stream in a single pass and validates * against the request-supplied values. Throws Md5Mismatch / Crc64Mismatch - * (HTTP 400) on mismatch — the documented Azure Storage error codes for + * (HTTP 400) on mismatch - the documented Azure Storage error codes for * transactional integrity failures. * * Rejects requests that supply both checksums with `BothCrc64AndMd5HeaderPresent` @@ -17,7 +32,7 @@ import { computeTransactionalChecksums } from "../../common/utils/utils"; * * A checksum is computed when its `expected` value is provided, OR when the * corresponding `force` flag is set (for callers that need the value for - * non-validation purposes — e.g. Put Blob persists MD5 as a blob property). + * non-validation purposes - e.g. Put Blob persists MD5 as a blob property). */ export async function computeAndValidateTransactionalChecksums( stream: NodeJS.ReadableStream, @@ -28,14 +43,18 @@ export async function computeAndValidateTransactionalChecksums( if (expected.md5 !== undefined && expected.crc64 !== undefined) { throw StorageErrorFactory.getBothCrc64AndMd5HeaderPresent(contextId); } - if (expected.md5 !== undefined) { - const md5Bytes = - typeof expected.md5 === "string" - ? Buffer.from(expected.md5, "base64") - : Buffer.from(expected.md5); - if (md5Bytes.length !== 16) { - throw StorageErrorFactory.getInvalidMd5(contextId); - } + if (expected.md5 !== undefined && !isValidMd5Header(expected.md5)) { + throw StorageErrorFactory.getInvalidMd5(contextId); + } + if (expected.crc64 !== undefined && Buffer.from(expected.crc64).length < 8) { + // CRC-64/NVME is a 64-bit value; the wire format is base64-encoded bytes. + // Verified against real Azure: <8 bytes is rejected as InvalidHeaderValue; + // >=8 bytes is accepted at header-validation and falls through to a value + // comparison (which then surfaces as Crc64Mismatch if it doesn't match). + throw StorageErrorFactory.getInvalidHeaderValue(contextId, { + HeaderName: "x-ms-content-crc64", + HeaderValue: Buffer.from(expected.crc64).toString("base64") + }); } const calculated = await computeTransactionalChecksums(stream, expected, force); diff --git a/src/common/utils/utils.ts b/src/common/utils/utils.ts index c0c6fe5b3..be3ec5cf0 100644 --- a/src/common/utils/utils.ts +++ b/src/common/utils/utils.ts @@ -183,7 +183,7 @@ export async function getMD5FromStream( // xorout = 0xffffffffffffffff // check = 0xae8b14860a799888 ("123456789") // -// Represented as two 32-bit halves (hi, lo) so we don't need BigInt — Azurite +// Represented as two 32-bit halves (hi, lo) so we don't need BigInt - Azurite // supports Node engines down to 10.0.0 where BigInt isn't reliable. Since this // is a reflected (right-shift) CRC, `lo` holds the bits that get consumed by // the next input byte. @@ -270,7 +270,7 @@ export async function getCRC64FromStream( * * `expected` is the caller's request-supplied value (only its presence matters * here; comparison happens at the caller). `force` is for callers that need a - * checksum for purposes other than validation — e.g. Put Blob always needs MD5 + * checksum for purposes other than validation - e.g. Put Blob always needs MD5 * because it's persisted as the blob's contentMD5 property. */ export async function computeTransactionalChecksums( diff --git a/tests/BlobTestServerFactory.ts b/tests/BlobTestServerFactory.ts index 79f82da2a..2360f5b04 100644 --- a/tests/BlobTestServerFactory.ts +++ b/tests/BlobTestServerFactory.ts @@ -9,7 +9,7 @@ import { LIVE_TEST_MODE } from "./testutils"; /** * No-op stand-in returned in live mode. Tests call start/close/clean on the - * "server", but in live mode there's no local server to manage — we just need + * "server", but in live mode there's no local server to manage - we just need * an object with a `config` whose host/port the test fixture can read. */ class LiveModeStubServer { diff --git a/tests/blob/apis/appendblob.test.ts b/tests/blob/apis/appendblob.test.ts index 56b8aac8c..dbe72e78d 100644 --- a/tests/blob/apis/appendblob.test.ts +++ b/tests/blob/apis/appendblob.test.ts @@ -530,7 +530,7 @@ describe("AppendBlobAPIs", () => { it("AppendBlock with both md5 and crc64 supplied should be rejected @loki", async () => { // Real Azure rejects requests that supply both Content-MD5 and - // x-ms-content-crc64 — Azurite must match. + // x-ms-content-crc64 - Azurite must match. await appendBlobClient.create(); const body = "HelloWorld"; const md5 = crypto.createHash("md5").update(body, "utf8").digest(); diff --git a/tests/blob/apis/blob.test.ts b/tests/blob/apis/blob.test.ts index b184de0cd..06f47d98a 100644 --- a/tests/blob/apis/blob.test.ts +++ b/tests/blob/apis/blob.test.ts @@ -1668,7 +1668,7 @@ describe("BlobAPIs", () => { // Per the Copy Blob From URL REST contract, when the client supplies // x-ms-source-content-md5 the service echoes it back as Content-MD5 on // the response (so the client can correlate against the source's hash). - // Real Azure requires the source URL to carry auth — generate a read SAS + // Real Azure requires the source URL to carry auth - generate a read SAS // (which the emulator also accepts). const sourceBlob = getUniqueName("blob"); const destBlob = getUniqueName("blob"); @@ -2582,6 +2582,8 @@ describe("BlobAPIs", () => { }); it("upload invalid x-ms-blob-content-md5 @loki @sql", async () => { + // Real Azure rejects a malformed x-ms-blob-content-md5 (not 16 bytes after + // base64 decode) with InvalidHeaderValue. const pipeline = newPipeline( new StorageSharedKeyCredential( EMULATOR_ACCOUNT_NAME, @@ -2605,8 +2607,7 @@ describe("BlobAPIs", () => { assert.fail("Expected MD5 error"); } catch (err) { assert.deepStrictEqual((err as any).statusCode, 400); - assert.deepStrictEqual((err as any).code, 'InvalidOperation'); - assert.deepStrictEqual((err as any).details.errorCode, 'InvalidOperation'); + assert.deepStrictEqual((err as any).code, "InvalidHeaderValue"); } }); diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index e5786d45a..c06654be5 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -144,28 +144,11 @@ describe("BlockBlobAPIs", () => { assert.deepStrictEqual(await bodyToString(result, 0), ""); }); - // ---------------------------------------------------------------------- - // Transactional checksum tests (Put Blob + Stage Block) - // - // Every error code, response header, and behavior asserted in this block - // is verified against the real Azure Blob service: set - // AZURITE_LIVE_TEST_CONNECTION_STRING and these same tests run against the - // live account (see tests/testutils.ts header). The assertions are - // therefore statements about the Azure REST contract, not Azurite-internal - // conventions. Specifically pinned here against live: - // - CRC-64/NVME algorithm + little-endian wire format (was ECMA-182/BE) - // - Md5Mismatch and Crc64Mismatch error codes (was generic InvalidOperation) - // - BothCrc64AndMd5HeaderPresent (HTTP 400) when both headers supplied - // - x-ms-content-crc64 always echoed on Stage Block response (computed - // server-side even when the client didn't send one) - // ---------------------------------------------------------------------- - it("upload (PutBlob) with correct crc64 should succeed @loki @sql", async () => { - // BlockBlobClient.upload's runtime DOES forward transactionalContentCrc64 + // BlockBlobClient.upload's runtime forwards transactionalContentCrc64 // (via setUploadChecksumParameters), but the public BlockBlobUploadOptions - // interface omits the field — purely a TypeScript surface gap. We reach the - // generated context directly to bypass the typed-surface omission. - // SDK fix proposed in https://github.com/Azure/azure-sdk-for-js/pull/38490. + // interface omits the field - a TypeScript surface gap. Reach the + // generated context directly to bypass it. const body = "HelloWorld"; const crc64 = getCRC64FromString(body); const result = await (blockBlobClient as any).blockBlobContext.upload( @@ -215,7 +198,7 @@ describe("BlockBlobAPIs", () => { it("upload (PutBlob) with both md5 and crc64 supplied should be rejected @loki @sql", async () => { // Real Azure rejects requests that supply both Content-MD5 and - // x-ms-content-crc64 — Azurite must match. + // x-ms-content-crc64 - Azurite must match. const body = "HelloWorld"; const md5 = crypto.createHash("md5").update(body, "utf8").digest(); const crc64 = getCRC64FromString(body); @@ -443,7 +426,7 @@ describe("BlockBlobAPIs", () => { it("stageBlock with wrong body should throw crc64 mismatch @loki @sql", async () => { const body = "HelloWorld"; - // Provide CRC64 of a different payload — server must reject the upload + // Provide CRC64 of a different payload - server must reject the upload const wrongCrc64 = getCRC64FromString("differentBody"); const options = { transactionalContentCrc64: new Uint8Array(wrongCrc64) }; @@ -465,7 +448,7 @@ describe("BlockBlobAPIs", () => { it("stageBlock with both md5 and crc64 supplied should be rejected @loki @sql", async () => { // Real Azure rejects requests that supply both Content-MD5 and - // x-ms-content-crc64 — Azurite must match. + // x-ms-content-crc64 - Azurite must match. const body = "HelloWorld"; const md5 = crypto.createHash("md5").update(body, "utf8").digest(); const crc64 = getCRC64FromString(body); diff --git a/tests/blob/apis/pageblob.test.ts b/tests/blob/apis/pageblob.test.ts index a763c6524..f1ebf0d4f 100644 --- a/tests/blob/apis/pageblob.test.ts +++ b/tests/blob/apis/pageblob.test.ts @@ -16,7 +16,8 @@ import { getTestServerBaseURL, getUniqueName } from "../../testutils"; -import { getMD5FromString } from "../../../src/common/utils/utils"; +import { getCRC64FromString, getMD5FromString } from "../../../src/common/utils/utils"; +import * as crypto from "crypto"; // Set true to enable debug log configLogger(false); @@ -533,6 +534,87 @@ describe("PageBlobAPIs", () => { assert.fail(); }); + it("uploadPages with correct crc64 should succeed and echo crc64 @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const crc64 = getCRC64FromString(body); + + const result = await pageBlobClient.uploadPages(body, 0, length, { + transactionalContentCrc64: new Uint8Array(crc64) + }); + assert.equal(result._response.status, 201); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(crc64) + ); + }); + + it("uploadPages with wrong crc64 should throw mismatch @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const wrongCrc64 = getCRC64FromString("b".repeat(length)); + try { + await pageBlobClient.uploadPages(body, 0, length, { + transactionalContentCrc64: new Uint8Array(wrongCrc64) + }); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Crc64Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("uploadPages with wrong md5 should throw mismatch @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + try { + await pageBlobClient.uploadPages(body, 0, length, { + transactionalContentMD5: new Uint8Array(wrongMd5) + }); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Md5Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("uploadPages with both md5 and crc64 supplied should be rejected @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const crc64 = getCRC64FromString(body); + try { + await pageBlobClient.uploadPages(body, 0, length, { + transactionalContentMD5: new Uint8Array(md5), + transactionalContentCrc64: new Uint8Array(crc64) + }); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("uploadPages without any checksum header should still echo computed crc64 @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const result = await pageBlobClient.uploadPages(body, 0, length); + assert.equal(result._response.status, 201); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(getCRC64FromString(body)) + ); + }); + it("uploadPages with sequential pages @loki", async () => { const length = 512 * 3; await pageBlobClient.create(length); diff --git a/tests/blob/utils.test.ts b/tests/blob/utils.test.ts index f2a22e1a9..75cfa12d9 100644 --- a/tests/blob/utils.test.ts +++ b/tests/blob/utils.test.ts @@ -65,7 +65,7 @@ describe("Utils", () => { describe("CRC64", () => { // CRC-64/NVME check value for "123456789" per the CRC catalogue: - // https://reveng.sourceforge.io/crc-catalogue/all.htm — the numeric value is + // https://reveng.sourceforge.io/crc-catalogue/all.htm - the numeric value is // 0xae8b14860a799888, serialised on the wire as 8 little-endian bytes. it("getCRC64FromString matches the standard CRC-64/NVME check value for '123456789'", () => { const result = getCRC64FromString("123456789"); @@ -91,7 +91,7 @@ describe("CRC64", () => { it("getCRC64FromStream produces identical results regardless of chunk boundaries", async () => { // Streaming data split across different chunk sizes must produce the same - // CRC as a single contiguous buffer — chunk boundaries must not affect the result. + // CRC as a single contiguous buffer - chunk boundaries must not affect the result. const data = Buffer.from("Azure Blob Storage block integrity check"); const expected = getCRC64FromString(data.toString()); diff --git a/tests/testutils.ts b/tests/testutils.ts index 4a0079b9d..20a14f748 100644 --- a/tests/testutils.ts +++ b/tests/testutils.ts @@ -8,34 +8,12 @@ import { URL } from "url"; // ---- Live Azure mode ------------------------------------------------------- // -// Why this exists: -// Azurite is meant to emulate the real Azure Blob service. Tests assert on -// error codes (Md5Mismatch, Crc64Mismatch, BothCrc64AndMd5HeaderPresent, -// InvalidMd5, InvalidHeaderValue, ...), checksum byte order, response-echo -// fields, etc. — and many of these expectations were originally taken from -// either documentation or "what makes sense", which is how Azurite ended up -// with subtle drifts from the real service (wrong CRC64 variant, big-endian -// bytes vs little-endian, generic InvalidOperation in place of typed -// Md5Mismatch, etc.). -// -// Routing the *same* test through real Azure is the only practical way to -// pin assertions to real-service behavior. When a test like -// `stageBlock with wrong body should throw md5 mismatch` passes both against -// the local Azurite server and against a real Azure account, the assertion -// is a verified statement about the service contract — not an Azurite-only -// convention. If a test only passes against Azurite, we know it's drift. -// -// How it works: -// Set AZURITE_LIVE_TEST_CONNECTION_STRING to a full storage account -// connection string. The harness then: -// - has `BlobTestServerFactory.createServer()` return a no-op stub -// (no local server starts/stops/cleans), -// - swaps `EMULATOR_ACCOUNT_NAME` / `EMULATOR_ACCOUNT_KEY` to the live -// account's credentials, -// - has `getTestServerBaseURL(server)` produce -// `https://.blob.core.windows.net` (no `/devstoreaccount1`). -// Tests that build their service client via these symbols therefore work -// against either backend without any per-test branching. +// Set AZURITE_LIVE_TEST_CONNECTION_STRING to a full storage account connection +// string to route tests at a real Azure account instead of a local Azurite +// server. When set: +// - BlobTestServerFactory.createServer() returns a no-op stub. +// - EMULATOR_ACCOUNT_NAME / EMULATOR_ACCOUNT_KEY resolve to the live account. +// - getTestServerBaseURL(server) returns the live blob endpoint. // // Per-test files build their service-client base URL via `getTestServerBaseURL` // (rather than the inline `http://host:port/devstoreaccount1` template), From fc2d2e78bc4cbfa9ffbf71242b11f1f040a291c8 Mon Sep 17 00:00:00 2001 From: kristofr Date: Mon, 11 May 2026 16:40:43 +0200 Subject: [PATCH 06/11] update stageblob behavior to match azure --- src/blob/handlers/BlockBlobHandler.ts | 14 +++++++------- tests/blob/apis/blockblob.test.ts | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/blob/handlers/BlockBlobHandler.ts b/src/blob/handlers/BlockBlobHandler.ts index 7e1f7b8fa..2c2824e73 100644 --- a/src/blob/handlers/BlockBlobHandler.ts +++ b/src/blob/handlers/BlockBlobHandler.ts @@ -186,14 +186,14 @@ export default class BlockBlobHandler const blobName = blobCtx.blob!; const date = blobCtx.startTime!; - // stageBlock operation doesn't have blobHTTPHeaders + // stageBlock operation doesn't accept blob property headers per the + // Put Block REST contract: only Content-MD5 and x-ms-content-crc64 are + // honored. Verified live: real Azure silently ignores x-ms-blob-content-md5 + // here (even malformed values), so don't use it as a fallback source. // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block - // options.blobHTTPHeaders = options.blobHTTPHeaders || {}; - const contentMD5 = context.request!.getHeader("content-md5") - || context.request!.getHeader("x-ms-blob-content-md5") - ? options.transactionalContentMD5 || - context.request!.getHeader("content-md5") - : undefined; + const contentMD5 = + options.transactionalContentMD5 || + context.request!.getHeader("content-md5"); const contentCRC64 = options.transactionalContentCrc64; this.validateBlockId(blockId, blobCtx); diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index c06654be5..35de61583 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -5,6 +5,7 @@ import { BlobSASPermissions, Tags } from "@azure/storage-blob"; +import CustomHeaderPolicyFactory from "../RequestPolicy/CustomHeaderPolicyFactory"; import * as assert from "assert"; import * as crypto from "crypto"; @@ -473,6 +474,26 @@ describe("BlockBlobAPIs", () => { assert.fail("Did not throw an exception."); }); + it("stageBlock ignores x-ms-blob-content-md5 (not a Put Block REST header) @loki @sql", async () => { + // Per the Put Block REST contract, x-ms-blob-content-md5 is NOT a Put Block + // header. Real Azure silently ignores it (even when malformed). Azurite + // must match: a bogus x-ms-blob-content-md5 must not cause validation or + // an error. + const pipeline = newPipeline( + new StorageSharedKeyCredential(EMULATOR_ACCOUNT_NAME, EMULATOR_ACCOUNT_KEY), + { retryOptions: { maxTries: 1 }, keepAliveOptions: { enable: false } } + ); + pipeline.factories.unshift( + new CustomHeaderPolicyFactory("x-ms-blob-content-md5", "AAAAAAAAAAA=") + ); + const altClient = new BlobServiceClient(baseURL, pipeline) + .getContainerClient(containerName) + .getBlockBlobClient(blobName); + + const result = await altClient.stageBlock(base64encode("1"), "HelloWorld", 10); + assert.equal(result._response.status, 201); + }); + it("stageBlock without any checksum header should still echo computed crc64 @loki @sql", async () => { // Per the Put Block REST contract, the service computes a CRC64 of the // block and returns it in x-ms-content-crc64 even when the client didn't From d773200320bcb0586a5e7d5626dbc6ffd24bc5c2 Mon Sep 17 00:00:00 2001 From: kristofr Date: Mon, 11 May 2026 17:10:59 +0200 Subject: [PATCH 07/11] clarify md5 contract according to real Azure --- src/blob/handlers/BlockBlobHandler.ts | 15 +++++++++----- tests/blob/apis/blockblob.test.ts | 28 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/blob/handlers/BlockBlobHandler.ts b/src/blob/handlers/BlockBlobHandler.ts index 2c2824e73..c2afbd148 100644 --- a/src/blob/handlers/BlockBlobHandler.ts +++ b/src/blob/handlers/BlockBlobHandler.ts @@ -64,11 +64,16 @@ export default class BlockBlobHandler }); } - const contentMD5 = context.request!.getHeader("content-md5") - || context.request!.getHeader("x-ms-blob-content-md5") - ? options.blobHTTPHeaders.blobContentMD5 || - context.request!.getHeader("content-md5") - : undefined; + // Per the Put Blob REST contract, x-ms-blob-content-md5 takes precedence + // over Content-MD5 for transit integrity verification on BlockBlob. + // Verified live. Prefer the SDK-parsed blobContentMD5 option; fall back + // to the raw x-ms-blob-content-md5 header (for clients that inject it + // directly without going through the SDK option); finally fall back to + // Content-MD5. + const contentMD5 = + options.blobHTTPHeaders.blobContentMD5 + ?? blobContentMD5Header + ?? context.request!.getHeader("content-md5"); const contentCRC64 = options.transactionalContentCrc64; await this.metadataStore.checkContainerExist( diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index 35de61583..2538ff4ae 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -197,6 +197,34 @@ describe("BlockBlobAPIs", () => { assert.fail("Did not throw an exception."); }); + it("upload (PutBlob) x-ms-blob-content-md5 takes precedence over Content-MD5 @loki @sql", async () => { + // Per the Put Blob REST contract, x-ms-blob-content-md5 takes precedence + // over Content-MD5 for transit integrity verification on BlockBlob. + // - Content-MD5 wrong + x-ms-blob-content-md5 correct -> success + // - Content-MD5 correct + x-ms-blob-content-md5 wrong -> Md5Mismatch + const body = "HelloWorld"; + const correctMd5 = crypto.createHash("md5").update(body, "utf8").digest(); + const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + + // Wrong transactional + correct blob-content-md5 -> success. + await (blockBlobClient as any).blockBlobContext.upload(body.length, body, { + transactionalContentMD5: new Uint8Array(wrongMd5), + blobHttpHeaders: { blobContentMD5: new Uint8Array(correctMd5) } + }); + + // Correct transactional + wrong blob-content-md5 -> Md5Mismatch. + try { + await (blockBlobClient as any).blockBlobContext.upload(body.length, body, { + transactionalContentMD5: new Uint8Array(correctMd5), + blobHttpHeaders: { blobContentMD5: new Uint8Array(wrongMd5) } + }); + assert.fail("Expected Md5Mismatch when x-ms-blob-content-md5 is wrong."); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Md5Mismatch"); + } + }); + it("upload (PutBlob) with both md5 and crc64 supplied should be rejected @loki @sql", async () => { // Real Azure rejects requests that supply both Content-MD5 and // x-ms-content-crc64 - Azurite must match. From a2b0e035df89d7c6685d20ef0938d80bccd15992 Mon Sep 17 00:00:00 2001 From: kristofr Date: Wed, 20 May 2026 09:31:22 +0200 Subject: [PATCH 08/11] add test pinning InvalidMd5 for wrong-length Content-MD5 Verified against real Azure: a 4-byte Content-MD5 header is rejected with InvalidMd5 (not Md5Mismatch). The new test guards stageBlock against silently regressing this code, complementing the existing wrong-value test that asserts Md5Mismatch. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/blob/apis/blockblob.test.ts | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index 2538ff4ae..e1127b42e 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -403,6 +403,30 @@ describe("BlockBlobAPIs", () => { assert.fail("Did not throw an exception."); }); + it("stageBlock with wrong-length MD5 should be rejected @loki @sql", async () => { + // Content-MD5 must decode to exactly 16 bytes. This test pins which error + // code the service returns for a malformed (4-byte) MD5 header so Azurite + // can be verified against real Azure. + const body = "HelloWorld"; + const wrongLengthMd5 = new Uint8Array([0, 0, 0, 0]); + const options = { transactionalContentMD5: wrongLengthMd5 }; + + try { + await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options + ); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "InvalidMd5"); + return; + } + assert.fail("Did not throw an exception."); + }); + it("stageBlock with md5 hash check @loki @sql", async () => { const body = "HelloWorld"; const md5 = crypto.createHash("md5").update(body, "utf8").digest(); From 1eb77cc34118807b9c06172c15c074244bca3f9a Mon Sep 17 00:00:00 2001 From: kristofr Date: Thu, 21 May 2026 12:12:44 +0200 Subject: [PATCH 09/11] unify MD5 validation: x-ms-blob-content-md5 also returns InvalidMd5 Live probe showed real Azure returns InvalidMd5 (not InvalidHeaderValue) for wrong-length x-ms-blob-content-md5 on Put Blob. Drop Azurite's explicit InvalidHeaderValue path and let the unified transactional validator handle all three MD5 sources (Content-MD5, transactionalContentMD5, x-ms-blob-content-md5) the same way. Also adds a CRC64 wrong-length test (pins the existing InvalidHeaderValue path) and a Put Blob wrong-length-blob-content-md5 test (pins the new InvalidMd5 path). Both verified live. Clarifies tests/testutils.ts JSDoc to acknowledge http:// connection strings instead of claiming HTTPS is always used in live mode. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/blob/handlers/BlockBlobHandler.ts | 23 +++----------- src/blob/utils/utils.ts | 5 ++-- tests/blob/apis/blockblob.test.ts | 43 +++++++++++++++++++++++++++ tests/testutils.ts | 4 ++- 4 files changed, 52 insertions(+), 23 deletions(-) diff --git a/src/blob/handlers/BlockBlobHandler.ts b/src/blob/handlers/BlockBlobHandler.ts index c2afbd148..08f54e708 100644 --- a/src/blob/handlers/BlockBlobHandler.ts +++ b/src/blob/handlers/BlockBlobHandler.ts @@ -15,8 +15,7 @@ import { BLOB_API_VERSION } from "../utils/constants"; import BaseHandler from "./BaseHandler"; import { computeAndValidateTransactionalChecksums, - getTagsFromString, - isValidMd5Header + getTagsFromString } from "../utils/utils"; /** @@ -49,30 +48,16 @@ export default class BlockBlobHandler context.request!.getHeader("content-type") || "application/octet-stream"; - // x-ms-blob-content-md5 is a blob property header (stored as the blob's - // contentMD5 metadata). Real Azure rejects malformed values with - // InvalidHeaderValue (HTTP 400). Validate format here - the transactional - // helper validates Content-MD5 separately with InvalidMd5. - const blobContentMD5Header = context.request!.getHeader("x-ms-blob-content-md5"); - if ( - typeof blobContentMD5Header === "string" && - !isValidMd5Header(blobContentMD5Header) - ) { - throw StorageErrorFactory.getInvalidHeaderValue(context.contextId!, { - HeaderName: "x-ms-blob-content-md5", - HeaderValue: blobContentMD5Header - }); - } - // Per the Put Blob REST contract, x-ms-blob-content-md5 takes precedence // over Content-MD5 for transit integrity verification on BlockBlob. // Verified live. Prefer the SDK-parsed blobContentMD5 option; fall back // to the raw x-ms-blob-content-md5 header (for clients that inject it // directly without going through the SDK option); finally fall back to - // Content-MD5. + // Content-MD5. Malformed values are rejected as InvalidMd5 by the + // unified validator below (matches real Azure for all three sources). const contentMD5 = options.blobHTTPHeaders.blobContentMD5 - ?? blobContentMD5Header + ?? context.request!.getHeader("x-ms-blob-content-md5") ?? context.request!.getHeader("content-md5"); const contentCRC64 = options.transactionalContentCrc64; diff --git a/src/blob/utils/utils.ts b/src/blob/utils/utils.ts index 748be124f..8be2b5253 100644 --- a/src/blob/utils/utils.ts +++ b/src/blob/utils/utils.ts @@ -9,9 +9,8 @@ import { computeTransactionalChecksums } from "../../common/utils/utils"; /** * Decodes an MD5 header value (base64 string or raw Uint8Array) and returns * whether the result is exactly 16 bytes - the only shape real Azure accepts. - * Callers throw context-appropriate errors: - * - Content-MD5 / transactionalContentMD5 -> InvalidMd5 - * - x-ms-blob-content-md5 -> InvalidHeaderValue + * Wrong-length values on Content-MD5, transactionalContentMD5, or + * x-ms-blob-content-md5 are all rejected with InvalidMd5 (verified live). */ export function isValidMd5Header(value: Uint8Array | string): boolean { const bytes = diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index e1127b42e..13a64009b 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -225,6 +225,25 @@ describe("BlockBlobAPIs", () => { } }); + it("upload (PutBlob) with wrong-length x-ms-blob-content-md5 should be rejected @loki @sql", async () => { + // x-ms-blob-content-md5 must decode to exactly 16 bytes. Verified live: + // real Azure rejects wrong-length values with InvalidMd5 (not + // InvalidHeaderValue, despite x-ms-blob-content-md5 being a property + // header). Azurite routes all MD5 sources through the same validator. + const body = "HelloWorld"; + const wrongLength = new Uint8Array([0, 0, 0, 0]); + try { + await (blockBlobClient as any).blockBlobContext.upload(body.length, body, { + blobHttpHeaders: { blobContentMD5: wrongLength } + }); + } catch (e: any) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "InvalidMd5"); + return; + } + assert.fail("Did not throw an exception."); + }); + it("upload (PutBlob) with both md5 and crc64 supplied should be rejected @loki @sql", async () => { // Real Azure rejects requests that supply both Content-MD5 and // x-ms-content-crc64 - Azurite must match. @@ -477,6 +496,30 @@ describe("BlockBlobAPIs", () => { assert.equal(listResponse.uncommittedBlocks![0].size, body.length); }); + it("stageBlock with wrong-length CRC64 should be rejected @loki @sql", async () => { + // x-ms-content-crc64 must decode to at least 8 bytes (CRC-64 is 64-bit). + // Real Azure rejects shorter values with InvalidHeaderValue; this test + // pins that contract for Azurite. + const body = "HelloWorld"; + const wrongLengthCrc64 = new Uint8Array([0, 0, 0, 0]); + const options = { transactionalContentCrc64: wrongLengthCrc64 }; + + try { + await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options + ); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "InvalidHeaderValue"); + return; + } + assert.fail("Did not throw an exception."); + }); + it("stageBlock with wrong body should throw crc64 mismatch @loki @sql", async () => { const body = "HelloWorld"; // Provide CRC64 of a different payload - server must reject the upload diff --git a/tests/testutils.ts b/tests/testutils.ts index 20a14f748..0e985c154 100644 --- a/tests/testutils.ts +++ b/tests/testutils.ts @@ -63,7 +63,9 @@ export const EMULATOR_ACCOUNT_KEY = * account's blob endpoint (e.g. `https://.blob.core.windows.net`). * * Pass `https: true` for the few tests that explicitly need HTTPS against the - * emulator (oauth/https tests); ignored in live mode where HTTPS is always used. + * emulator (oauth/https tests); ignored in live mode, where the protocol is + * dictated by the connection string (`DefaultEndpointsProtocol` / `BlobEndpoint`) + * and is HTTPS for typical Azure accounts. */ export function getTestServerBaseURL( server: { config: { host: string; port: number } }, From 8be1cf633b410a49fb5aa31daaedf439d636dd88 Mon Sep 17 00:00:00 2001 From: kristofr Date: Mon, 25 May 2026 10:27:05 +0200 Subject: [PATCH 10/11] made md5 and crc validation logic much more robust and tested extensively against live azure to ensure identical behavior --- src/blob/utils/utils.ts | 34 +++++--- src/common/utils/utils.ts | 2 +- tests/blob/apis/blob.test.ts | 4 +- tests/blob/handlers/AppendBlobHandler.test.ts | 11 ++- tests/blob/utils.test.ts | 80 ++++++++++++++++++- 5 files changed, 113 insertions(+), 18 deletions(-) diff --git a/src/blob/utils/utils.ts b/src/blob/utils/utils.ts index 8be2b5253..c258b4f86 100644 --- a/src/blob/utils/utils.ts +++ b/src/blob/utils/utils.ts @@ -35,7 +35,7 @@ export function isValidMd5Header(value: Uint8Array | string): boolean { */ export async function computeAndValidateTransactionalChecksums( stream: NodeJS.ReadableStream, - expected: { md5?: Uint8Array | string; crc64?: Uint8Array }, + expected: { md5?: Uint8Array | string; crc64?: Uint8Array | string }, contextId: string | undefined, force?: { md5?: boolean; crc64?: boolean } ): Promise<{ md5?: Uint8Array; crc64?: Uint8Array }> { @@ -45,32 +45,42 @@ export async function computeAndValidateTransactionalChecksums( if (expected.md5 !== undefined && !isValidMd5Header(expected.md5)) { throw StorageErrorFactory.getInvalidMd5(contextId); } - if (expected.crc64 !== undefined && Buffer.from(expected.crc64).length < 8) { + const expectedCrc64Bytes = + expected.crc64 === undefined + ? undefined + : typeof expected.crc64 === "string" + ? Buffer.from(expected.crc64, "base64") + : Buffer.from(expected.crc64); + + if (expectedCrc64Bytes !== undefined && expectedCrc64Bytes.length < 8) { // CRC-64/NVME is a 64-bit value; the wire format is base64-encoded bytes. // Verified against real Azure: <8 bytes is rejected as InvalidHeaderValue; // >=8 bytes is accepted at header-validation and falls through to a value // comparison (which then surfaces as Crc64Mismatch if it doesn't match). throw StorageErrorFactory.getInvalidHeaderValue(contextId, { HeaderName: "x-ms-content-crc64", - HeaderValue: Buffer.from(expected.crc64).toString("base64") + HeaderValue: expectedCrc64Bytes.toString("base64") }); } const calculated = await computeTransactionalChecksums(stream, expected, force); if (expected.md5 !== undefined) { - const expectedMd5 = + const expectedMd5Bytes = typeof expected.md5 === "string" - ? expected.md5 - : Buffer.from(expected.md5).toString("base64"); - const calculatedMd5 = Buffer.from(calculated.md5!).toString("base64"); - if (expectedMd5 !== calculatedMd5) { + ? Buffer.from(expected.md5, "base64") + : Buffer.from(expected.md5); + const calculatedMd5Bytes = Buffer.from(calculated.md5!); + if (!expectedMd5Bytes.equals(calculatedMd5Bytes)) { + const expectedMd5 = expectedMd5Bytes.toString("base64"); + const calculatedMd5 = calculatedMd5Bytes.toString("base64"); throw StorageErrorFactory.getMd5Mismatch(contextId, expectedMd5, calculatedMd5); } } - if (expected.crc64 !== undefined) { - const expectedCrc64 = Buffer.from(expected.crc64).toString("base64"); - const calculatedCrc64 = Buffer.from(calculated.crc64!).toString("base64"); - if (expectedCrc64 !== calculatedCrc64) { + if (expectedCrc64Bytes !== undefined) { + const calculatedCrc64Bytes = Buffer.from(calculated.crc64!); + if (!expectedCrc64Bytes.equals(calculatedCrc64Bytes)) { + const expectedCrc64 = expectedCrc64Bytes.toString("base64"); + const calculatedCrc64 = calculatedCrc64Bytes.toString("base64"); throw StorageErrorFactory.getCrc64Mismatch(contextId, expectedCrc64, calculatedCrc64); } } diff --git a/src/common/utils/utils.ts b/src/common/utils/utils.ts index be3ec5cf0..cbff27afb 100644 --- a/src/common/utils/utils.ts +++ b/src/common/utils/utils.ts @@ -275,7 +275,7 @@ export async function getCRC64FromStream( */ export async function computeTransactionalChecksums( stream: NodeJS.ReadableStream, - expected: { md5?: Uint8Array | string; crc64?: Uint8Array }, + expected: { md5?: Uint8Array | string; crc64?: Uint8Array | string }, force?: { md5?: boolean; crc64?: boolean } ): Promise<{ md5?: Uint8Array; crc64?: Uint8Array }> { const needMd5 = expected.md5 !== undefined || !!force?.md5; diff --git a/tests/blob/apis/blob.test.ts b/tests/blob/apis/blob.test.ts index 06f47d98a..d0831b694 100644 --- a/tests/blob/apis/blob.test.ts +++ b/tests/blob/apis/blob.test.ts @@ -2583,7 +2583,7 @@ describe("BlobAPIs", () => { it("upload invalid x-ms-blob-content-md5 @loki @sql", async () => { // Real Azure rejects a malformed x-ms-blob-content-md5 (not 16 bytes after - // base64 decode) with InvalidHeaderValue. + // base64 decode) with InvalidMd5. const pipeline = newPipeline( new StorageSharedKeyCredential( EMULATOR_ACCOUNT_NAME, @@ -2607,7 +2607,7 @@ describe("BlobAPIs", () => { assert.fail("Expected MD5 error"); } catch (err) { assert.deepStrictEqual((err as any).statusCode, 400); - assert.deepStrictEqual((err as any).code, "InvalidHeaderValue"); + assert.deepStrictEqual((err as any).code, "InvalidMd5"); } }); diff --git a/tests/blob/handlers/AppendBlobHandler.test.ts b/tests/blob/handlers/AppendBlobHandler.test.ts index 739d2a988..a8ef5a57f 100644 --- a/tests/blob/handlers/AppendBlobHandler.test.ts +++ b/tests/blob/handlers/AppendBlobHandler.test.ts @@ -94,6 +94,13 @@ describe("AppendBlobHandler", () => { // so we accept any PassThrough stream here. extentStore.appendExtent(anyOfClass(PassThrough), blobCtx.contextId) ).thenResolve(extent); + // appendBlock always re-reads the persisted extent to compute checksums, + // so provide a fresh readable stream for each invocation. + when(extentStore.readExtent(extent, blobCtx.contextId)).thenCall(() => { + const readStream = new PassThrough(); + readStream.end(buffer); + return Promise.resolve(readStream); + }); describe("create", () => { it("accepts requests withContent-Length == 0 @loki", async () => { @@ -195,7 +202,7 @@ describe("AppendBlobHandler", () => { }); }); - it("rejects requests with invalid MD5 checksum @loki", async () => { + it("rejects requests with malformed MD5 checksum @loki", async () => { when(request.getHeader(HeaderConstants.CONTENT_MD5)).thenReturn( "d3JvbmdfTUQ1X2NoZWNrc3VtCg==" ); @@ -215,7 +222,7 @@ describe("AppendBlobHandler", () => { }, { name: "StorageError", - storageErrorCode: "Md5Mismatch" + storageErrorCode: "InvalidMd5" } ); }); diff --git a/tests/blob/utils.test.ts b/tests/blob/utils.test.ts index 75cfa12d9..ea15ad0a2 100644 --- a/tests/blob/utils.test.ts +++ b/tests/blob/utils.test.ts @@ -1,9 +1,11 @@ import * as assert from "assert"; import { PassThrough } from "stream"; +import { computeAndValidateTransactionalChecksums } from "../../src/blob/utils/utils"; import { convertRawHeadersToMetadata, getCRC64FromStream, - getCRC64FromString + getCRC64FromString, + getMD5FromString } from "../../src/common/utils/utils"; describe("Utils", () => { @@ -118,3 +120,79 @@ describe("CRC64", () => { assert.notDeepStrictEqual(Buffer.from(crc1), Buffer.from(crc2)); }); }); + +describe("Transactional Checksum Representation", () => { + function makeBodyStream(body: string): PassThrough { + const stream = new PassThrough(); + stream.end(Buffer.from(body)); + return stream; + } + + it("accepts non-canonical base64 MD5 that decodes to the same 16 bytes", async () => { + const body = "representation-md5-test"; + const md5 = await getMD5FromString(body); + const canonical = Buffer.from(md5).toString("base64"); + const nonCanonical = canonical.replace(/=+$/, ""); + + await assert.doesNotReject(async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { md5: nonCanonical }, + "test-md5-noncanonical" + ); + }); + }); + + it("accepts non-canonical base64 CRC64 that decodes to the same 8 bytes", async () => { + const body = "representation-crc64-test"; + const crc64 = getCRC64FromString(body); + const canonical = Buffer.from(crc64).toString("base64"); + const nonCanonical = canonical.replace(/=+$/, ""); + + await assert.doesNotReject(async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { crc64: nonCanonical }, + "test-crc64-noncanonical" + ); + }); + }); + + it("rejects malformed base64 MD5 that decodes to fewer than 16 bytes", async () => { + const body = "representation-md5-invalid-test"; + const malformedMd5 = Buffer.from([1, 2, 3, 4]).toString("base64").replace(/=+$/, ""); + + await assert.rejects( + async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { md5: malformedMd5 }, + "test-md5-invalid" + ); + }, + { + name: "StorageError", + storageErrorCode: "InvalidMd5" + } + ); + }); + + it("rejects malformed base64 CRC64 that decodes to fewer than 8 bytes", async () => { + const body = "representation-crc64-invalid-test"; + const malformedCrc64 = Buffer.from([1, 2, 3, 4]).toString("base64").replace(/=+$/, ""); + + await assert.rejects( + async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { crc64: malformedCrc64 }, + "test-crc64-invalid" + ); + }, + { + name: "StorageError", + storageErrorCode: "InvalidHeaderValue" + } + ); + }); +}); From 86d3ca00026387e8c7d339aef4e09cc793650b61 Mon Sep 17 00:00:00 2001 From: kristofr Date: Mon, 25 May 2026 12:00:35 +0200 Subject: [PATCH 11/11] Harden checksum header validation and test coverage --- package.json | 1 + src/blob/utils/utils.ts | 55 +++++++++++-- tests/blob/apis/blockblob.test.ts | 126 +++++++++++++++++++++--------- tests/blob/utils.test.ts | 53 +++++++++++-- tests/testutils.ts | 12 ++- 5 files changed, 194 insertions(+), 53 deletions(-) diff --git a/package.json b/package.json index 6df18cfcb..b5f0f101c 100644 --- a/package.json +++ b/package.json @@ -317,6 +317,7 @@ "test:blob:in-memory": "npm run lint && cross-env AZURITE_TEST_INMEMORYPERSISTENCE=1 NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --grep @loki --recursive --exit tests/blob/*.test.ts tests/blob/**/*.test.ts", "test:blob:sql": "npm run lint && cross-env cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 AZURITE_TEST_DB=mysql://root:my-secret-pw@127.0.0.1:3306/azurite_blob_test mocha --require ts-node/register --no-timeouts --grep @sql --recursive --exit tests/blob/*.test.ts tests/blob/**/*.test.ts", "test:blob:sql:ci": "npm run lint && cross-env cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 AZURITE_TEST_DB=mysql://root:my-secret-pw@127.0.0.1:13306/azurite_blob_test mocha --require ts-node/register --no-timeouts --grep @sql --recursive --exit tests/blob/*.test.ts tests/blob/**/*.test.ts", + "test:blob:live": "cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --recursive --exit tests/blob/*.test.ts tests/blob/**/*.test.ts", "test:queue": "npm run lint && cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --recursive --exit tests/queue/*.test.ts tests/queue/**/*.test.ts", "test:queue:in-memory": "npm run lint && cross-env AZURITE_TEST_INMEMORYPERSISTENCE=1 NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --recursive --exit tests/queue/*.test.ts tests/queue/**/*.test.ts", "test:table": "npm run lint && cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --recursive --exit tests/table/**/*.test.ts", diff --git a/src/blob/utils/utils.ts b/src/blob/utils/utils.ts index c258b4f86..13876d129 100644 --- a/src/blob/utils/utils.ts +++ b/src/blob/utils/utils.ts @@ -6,6 +6,39 @@ import { BlobTag, BlobTags } from "@azure/storage-blob"; import { TagContent } from "../persistence/QueryInterpreter/QueryNodes/IQueryNode"; import { computeTransactionalChecksums } from "../../common/utils/utils"; +function decodeBase64HeaderValue(value: string): Buffer | undefined { + if (value.length === 0) { + return Buffer.alloc(0); + } + + // Allow missing padding, but reject non-base64 characters and misplaced '='. + if (!/^[A-Za-z0-9+/]*={0,2}$/.test(value)) { + return undefined; + } + + const firstPadding = value.indexOf("="); + if (firstPadding !== -1 && !/^=+$/.test(value.slice(firstPadding))) { + return undefined; + } + + const unpadded = value.replace(/=+$/, ""); + // Base64 payload length modulo 4 can only be 0, 2, or 3. + if (unpadded.length % 4 === 1) { + return undefined; + } + + const normalized = + unpadded + "=".repeat((4 - (unpadded.length % 4)) % 4); + const decoded = Buffer.from(normalized, "base64"); + + // Ensure the supplied payload is a valid base64 encoding for decoded bytes. + if (decoded.toString("base64").replace(/=+$/, "") !== unpadded) { + return undefined; + } + + return decoded; +} + /** * Decodes an MD5 header value (base64 string or raw Uint8Array) and returns * whether the result is exactly 16 bytes - the only shape real Azure accepts. @@ -15,9 +48,9 @@ import { computeTransactionalChecksums } from "../../common/utils/utils"; export function isValidMd5Header(value: Uint8Array | string): boolean { const bytes = typeof value === "string" - ? Buffer.from(value, "base64") + ? decodeBase64HeaderValue(value) : Buffer.from(value); - return bytes.length === 16; + return bytes !== undefined && bytes.length === 16; } /** @@ -45,21 +78,31 @@ export async function computeAndValidateTransactionalChecksums( if (expected.md5 !== undefined && !isValidMd5Header(expected.md5)) { throw StorageErrorFactory.getInvalidMd5(contextId); } + const expectedCrc64RawHeader = + typeof expected.crc64 === "string" + ? expected.crc64 + : expected.crc64 !== undefined + ? Buffer.from(expected.crc64).toString("base64") + : undefined; + const expectedCrc64Bytes = expected.crc64 === undefined ? undefined : typeof expected.crc64 === "string" - ? Buffer.from(expected.crc64, "base64") + ? decodeBase64HeaderValue(expected.crc64) : Buffer.from(expected.crc64); - if (expectedCrc64Bytes !== undefined && expectedCrc64Bytes.length < 8) { + if ( + expected.crc64 !== undefined && + (expectedCrc64Bytes === undefined || expectedCrc64Bytes.length < 8) + ) { // CRC-64/NVME is a 64-bit value; the wire format is base64-encoded bytes. // Verified against real Azure: <8 bytes is rejected as InvalidHeaderValue; // >=8 bytes is accepted at header-validation and falls through to a value // comparison (which then surfaces as Crc64Mismatch if it doesn't match). throw StorageErrorFactory.getInvalidHeaderValue(contextId, { HeaderName: "x-ms-content-crc64", - HeaderValue: expectedCrc64Bytes.toString("base64") + HeaderValue: expectedCrc64RawHeader ?? "" }); } const calculated = await computeTransactionalChecksums(stream, expected, force); @@ -67,7 +110,7 @@ export async function computeAndValidateTransactionalChecksums( if (expected.md5 !== undefined) { const expectedMd5Bytes = typeof expected.md5 === "string" - ? Buffer.from(expected.md5, "base64") + ? decodeBase64HeaderValue(expected.md5)! : Buffer.from(expected.md5); const calculatedMd5Bytes = Buffer.from(calculated.md5!); if (!expectedMd5Bytes.equals(calculatedMd5Bytes)) { diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index 13a64009b..5439168cf 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -76,6 +76,35 @@ describe("BlockBlobAPIs", () => { await containerClient.delete(); }); + // Temporary helper: The SDK's TypeScript wrapper does not expose some + // checksum headers on BlockBlobUploadOptions yet, so tests inject raw HTTP + // headers through a custom pipeline policy. Remove this once the TypeScript + // wrapper surfaces these headers on the public options type. + function getBlockBlobClientWithRawHeaders( + container: string, + blob: string, + headers: Array<{ key: string; value: string }> + ) { + const pipeline = newPipeline( + new StorageSharedKeyCredential( + EMULATOR_ACCOUNT_NAME, + EMULATOR_ACCOUNT_KEY + ), + { + retryOptions: { maxTries: 1 }, + keepAliveOptions: { enable: false } + } + ); + for (const header of headers) { + pipeline.factories.unshift( + new CustomHeaderPolicyFactory(header.key, header.value) + ); + } + + const customClient = new BlobServiceClient(baseURL, pipeline); + return customClient.getContainerClient(container).getBlockBlobClient(blob); + } + it("Block blob upload should refresh lease state @loki @sql", async () => { await blockBlobClient.upload('a', 1); @@ -146,17 +175,15 @@ describe("BlockBlobAPIs", () => { }); it("upload (PutBlob) with correct crc64 should succeed @loki @sql", async () => { - // BlockBlobClient.upload's runtime forwards transactionalContentCrc64 - // (via setUploadChecksumParameters), but the public BlockBlobUploadOptions - // interface omits the field - a TypeScript surface gap. Reach the - // generated context directly to bypass it. const body = "HelloWorld"; const crc64 = getCRC64FromString(body); - const result = await (blockBlobClient as any).blockBlobContext.upload( - body.length, - body, - { transactionalContentCrc64: new Uint8Array(crc64) } - ); + const clientWithCrc64 = getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "x-ms-content-crc64", + value: Buffer.from(crc64).toString("base64") + } + ]); + const result = await clientWithCrc64.upload(body, body.length); assert.equal(result._response.status, 201); const downloaded = await blobClient.download(0); @@ -166,12 +193,14 @@ describe("BlockBlobAPIs", () => { it("upload (PutBlob) with wrong crc64 should throw mismatch @loki @sql", async () => { const body = "HelloWorld"; const wrongCrc64 = getCRC64FromString("differentBody"); + const clientWithWrongCrc64 = getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "x-ms-content-crc64", + value: Buffer.from(wrongCrc64).toString("base64") + } + ]); try { - await (blockBlobClient as any).blockBlobContext.upload( - body.length, - body, - { transactionalContentCrc64: new Uint8Array(wrongCrc64) } - ); + await clientWithWrongCrc64.upload(body, body.length); } catch (e) { assert.equal(e.statusCode, 400); assert.equal(e.code, "Crc64Mismatch"); @@ -183,12 +212,14 @@ describe("BlockBlobAPIs", () => { it("upload (PutBlob) with wrong md5 should throw mismatch @loki @sql", async () => { const body = "HelloWorld"; const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + const clientWithWrongMd5 = getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "content-md5", + value: Buffer.from(wrongMd5).toString("base64") + } + ]); try { - await (blockBlobClient as any).blockBlobContext.upload( - body.length, - body, - { transactionalContentMD5: new Uint8Array(wrongMd5) } - ); + await clientWithWrongMd5.upload(body, body.length); } catch (e) { assert.equal(e.statusCode, 400); assert.equal(e.code, "Md5Mismatch"); @@ -206,18 +237,36 @@ describe("BlockBlobAPIs", () => { const correctMd5 = crypto.createHash("md5").update(body, "utf8").digest(); const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + const clientWithWrongContentAndCorrectBlobMd5 = + getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "content-md5", + value: Buffer.from(wrongMd5).toString("base64") + }, + { + key: "x-ms-blob-content-md5", + value: Buffer.from(correctMd5).toString("base64") + } + ]); + // Wrong transactional + correct blob-content-md5 -> success. - await (blockBlobClient as any).blockBlobContext.upload(body.length, body, { - transactionalContentMD5: new Uint8Array(wrongMd5), - blobHttpHeaders: { blobContentMD5: new Uint8Array(correctMd5) } - }); + await clientWithWrongContentAndCorrectBlobMd5.upload(body, body.length); + + const clientWithCorrectContentAndWrongBlobMd5 = + getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "content-md5", + value: Buffer.from(correctMd5).toString("base64") + }, + { + key: "x-ms-blob-content-md5", + value: Buffer.from(wrongMd5).toString("base64") + } + ]); // Correct transactional + wrong blob-content-md5 -> Md5Mismatch. try { - await (blockBlobClient as any).blockBlobContext.upload(body.length, body, { - transactionalContentMD5: new Uint8Array(correctMd5), - blobHttpHeaders: { blobContentMD5: new Uint8Array(wrongMd5) } - }); + await clientWithCorrectContentAndWrongBlobMd5.upload(body, body.length); assert.fail("Expected Md5Mismatch when x-ms-blob-content-md5 is wrong."); } catch (e) { assert.equal(e.statusCode, 400); @@ -233,8 +282,8 @@ describe("BlockBlobAPIs", () => { const body = "HelloWorld"; const wrongLength = new Uint8Array([0, 0, 0, 0]); try { - await (blockBlobClient as any).blockBlobContext.upload(body.length, body, { - blobHttpHeaders: { blobContentMD5: wrongLength } + await blockBlobClient.upload(body, body.length, { + blobHTTPHeaders: { blobContentMD5: wrongLength } }); } catch (e: any) { assert.equal(e.statusCode, 400); @@ -250,15 +299,18 @@ describe("BlockBlobAPIs", () => { const body = "HelloWorld"; const md5 = crypto.createHash("md5").update(body, "utf8").digest(); const crc64 = getCRC64FromString(body); + const clientWithCrc64AndMd5 = getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "x-ms-content-crc64", + value: Buffer.from(crc64).toString("base64") + }, + { + key: "content-md5", + value: Buffer.from(md5).toString("base64") + } + ]); try { - await (blockBlobClient as any).blockBlobContext.upload( - body.length, - body, - { - transactionalContentMD5: new Uint8Array(md5), - transactionalContentCrc64: new Uint8Array(crc64) - } - ); + await clientWithCrc64AndMd5.upload(body, body.length); } catch (e) { assert.equal(e.statusCode, 400); assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); diff --git a/tests/blob/utils.test.ts b/tests/blob/utils.test.ts index ea15ad0a2..58588acb8 100644 --- a/tests/blob/utils.test.ts +++ b/tests/blob/utils.test.ts @@ -108,14 +108,19 @@ describe("CRC64", () => { assert.deepStrictEqual(Buffer.from(fromChunked), Buffer.from(expected)); }); - it("getCRC64FromString produces distinct values for inputs that differ by a single byte", () => { + it("getCRC64FromStream produces distinct values for payloads that differ by a single byte", async () => { // Verifies the avalanche property: a one-byte change must alter the checksum. const base = Buffer.from("block content for crc64 test"); - const mutated = Buffer.from(base); + const mutated = Buffer.from([...base]); mutated[mutated.length - 1] ^= 0x01; - const crc1 = getCRC64FromString(base.toString("latin1")); - const crc2 = getCRC64FromString(mutated.toString("latin1")); + const baseStream = new PassThrough(); + baseStream.end(base); + const mutatedStream = new PassThrough(); + mutatedStream.end(mutated); + + const crc1 = await getCRC64FromStream(baseStream); + const crc2 = await getCRC64FromStream(mutatedStream); assert.notDeepStrictEqual(Buffer.from(crc1), Buffer.from(crc2)); }); @@ -158,7 +163,7 @@ describe("Transactional Checksum Representation", () => { }); }); - it("rejects malformed base64 MD5 that decodes to fewer than 16 bytes", async () => { + it("rejects wrong-length MD5 that decodes to fewer than 16 bytes", async () => { const body = "representation-md5-invalid-test"; const malformedMd5 = Buffer.from([1, 2, 3, 4]).toString("base64").replace(/=+$/, ""); @@ -177,7 +182,7 @@ describe("Transactional Checksum Representation", () => { ); }); - it("rejects malformed base64 CRC64 that decodes to fewer than 8 bytes", async () => { + it("rejects wrong-length CRC64 that decodes to fewer than 8 bytes", async () => { const body = "representation-crc64-invalid-test"; const malformedCrc64 = Buffer.from([1, 2, 3, 4]).toString("base64").replace(/=+$/, ""); @@ -195,4 +200,40 @@ describe("Transactional Checksum Representation", () => { } ); }); + + it("rejects invalid-base64 MD5 with non-base64 characters", async () => { + const body = "representation-md5-invalid-base64"; + + await assert.rejects( + async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { md5: "invalid_@md5" }, + "test-md5-invalid-base64" + ); + }, + { + name: "StorageError", + storageErrorCode: "InvalidMd5" + } + ); + }); + + it("rejects invalid-base64 CRC64 with non-base64 characters", async () => { + const body = "representation-crc64-invalid-base64"; + + await assert.rejects( + async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { crc64: "invalid_@crc64" }, + "test-crc64-invalid-base64" + ); + }, + { + name: "StorageError", + storageErrorCode: "InvalidHeaderValue" + } + ); + }); }); diff --git a/tests/testutils.ts b/tests/testutils.ts index 0e985c154..a0c6cbf77 100644 --- a/tests/testutils.ts +++ b/tests/testutils.ts @@ -5,6 +5,10 @@ import { sign } from "jsonwebtoken"; import { join } from "path"; import rimraf from "rimraf"; import { URL } from "url"; +import { + EMULATOR_ACCOUNT_KEY_STR as DEFAULT_EMULATOR_ACCOUNT_KEY_STR, + EMULATOR_ACCOUNT_NAME as DEFAULT_EMULATOR_ACCOUNT_NAME +} from "../src/blob/utils/constants"; // ---- Live Azure mode ------------------------------------------------------- // @@ -52,10 +56,10 @@ const liveConfig = liveConnectionString : undefined; export const EMULATOR_ACCOUNT_NAME = - liveConfig?.accountName ?? "devstoreaccount1"; + liveConfig?.accountName ?? DEFAULT_EMULATOR_ACCOUNT_NAME; export const EMULATOR_ACCOUNT_KEY = liveConfig?.accountKey ?? - "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; + DEFAULT_EMULATOR_ACCOUNT_KEY_STR; /** * Builds the blob service base URL for a test fixture. In emulator mode this @@ -221,7 +225,7 @@ export async function createRandomLocalFile( ws.on("open", () => { // tslint:disable-next-line:no-empty - while (offsetInMB++ < blockNumber && ws.write(randomValueHex())) {} + while (offsetInMB++ < blockNumber && ws.write(randomValueHex())) { } if (offsetInMB >= blockNumber) { ws.end(); } @@ -229,7 +233,7 @@ export async function createRandomLocalFile( ws.on("drain", () => { // tslint:disable-next-line:no-empty - while (offsetInMB++ < blockNumber && ws.write(randomValueHex())) {} + while (offsetInMB++ < blockNumber && ws.write(randomValueHex())) { } if (offsetInMB >= blockNumber) { ws.end(); }