diff --git a/package.json b/package.json index 6df18cfcb..b5f0f101c 100644 --- a/package.json +++ b/package.json @@ -317,6 +317,7 @@ "test:blob:in-memory": "npm run lint && cross-env AZURITE_TEST_INMEMORYPERSISTENCE=1 NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --grep @loki --recursive --exit tests/blob/*.test.ts tests/blob/**/*.test.ts", "test:blob:sql": "npm run lint && cross-env cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 AZURITE_TEST_DB=mysql://root:my-secret-pw@127.0.0.1:3306/azurite_blob_test mocha --require ts-node/register --no-timeouts --grep @sql --recursive --exit tests/blob/*.test.ts tests/blob/**/*.test.ts", "test:blob:sql:ci": "npm run lint && cross-env cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 AZURITE_TEST_DB=mysql://root:my-secret-pw@127.0.0.1:13306/azurite_blob_test mocha --require ts-node/register --no-timeouts --grep @sql --recursive --exit tests/blob/*.test.ts tests/blob/**/*.test.ts", + "test:blob:live": "cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --recursive --exit tests/blob/*.test.ts tests/blob/**/*.test.ts", "test:queue": "npm run lint && cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --recursive --exit tests/queue/*.test.ts tests/queue/**/*.test.ts", "test:queue:in-memory": "npm run lint && cross-env AZURITE_TEST_INMEMORYPERSISTENCE=1 NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --recursive --exit tests/queue/*.test.ts tests/queue/**/*.test.ts", "test:table": "npm run lint && cross-env NODE_TLS_REJECT_UNAUTHORIZED=0 mocha --require ts-node/register --no-timeouts --recursive --exit tests/table/**/*.test.ts", diff --git a/src/blob/errors/StorageErrorFactory.ts b/src/blob/errors/StorageErrorFactory.ts index c44776fb5..a0c4e897b 100644 --- a/src/blob/errors/StorageErrorFactory.ts +++ b/src/blob/errors/StorageErrorFactory.ts @@ -190,6 +190,43 @@ export default class StorageErrorFactory { ); } + public static getCrc64Mismatch( + contextID: string = DefaultID, + userSpecifiedCrc64: string, + serverCalculatedCrc64: string + ): StorageError { + return new StorageError( + 400, + "Crc64Mismatch", + "The CRC64 value specified in the request did not match with the CRC64 value calculated by the server.", + contextID, + { + UserSpecifiedCrc64: userSpecifiedCrc64, + ServerCalculatedCrc64: serverCalculatedCrc64 + } + ); + } + + public static getBothCrc64AndMd5HeaderPresent( + contextID: string = DefaultID + ): StorageError { + return new StorageError( + 400, + "BothCrc64AndMd5HeaderPresent", + "Both x-ms-content-crc64 header and Content-MD5 header are present.", + contextID + ); + } + + public static getInvalidMd5(contextID: string = DefaultID): StorageError { + return new StorageError( + 400, + "InvalidMd5", + "The MD5 value specified in the request is invalid. The MD5 value must be 128 bits and Base64-encoded.", + contextID + ); + } + public static getInvalidPageRange(contextID: string): StorageError { return new StorageError( 416, diff --git a/src/blob/handlers/AppendBlobHandler.ts b/src/blob/handlers/AppendBlobHandler.ts index 99bc462b5..dd34ce75f 100644 --- a/src/blob/handlers/AppendBlobHandler.ts +++ b/src/blob/handlers/AppendBlobHandler.ts @@ -1,5 +1,7 @@ -import { convertRawHeadersToMetadata } from "../../common/utils/utils"; -import { getMD5FromStream, newEtag } from "../../common/utils/utils"; +import { + convertRawHeadersToMetadata, + newEtag +} from "../../common/utils/utils"; import BlobStorageContext from "../context/BlobStorageContext"; import NotImplementedError from "../errors/NotImplementedError"; import StorageErrorFactory from "../errors/StorageErrorFactory"; @@ -13,7 +15,7 @@ import { MAX_APPEND_BLOB_BLOCK_COUNT, MAX_APPEND_BLOB_BLOCK_SIZE } from "../utils/constants"; -import { getTagsFromString } from "../utils/utils"; +import { computeAndValidateTransactionalChecksums, getTagsFromString } from "../utils/utils"; import BaseHandler from "./BaseHandler"; export default class AppendBlobHandler extends BaseHandler @@ -149,38 +151,28 @@ export default class AppendBlobHandler extends BaseHandler ); } - // MD5 + // MD5 and/or CRC64 transactional integrity validation const contentMD5 = blobCtx.request!.getHeader(HeaderConstants.CONTENT_MD5); + const contentCRC64 = options.transactionalContentCrc64; let contentMD5Buffer; - let contentMD5String; if (contentMD5 !== undefined) { contentMD5Buffer = typeof contentMD5 === "string" ? Buffer.from(contentMD5, "base64") : contentMD5; - contentMD5String = - typeof contentMD5 === "string" - ? contentMD5 - : contentMD5Buffer.toString("base64"); + } - const stream = await this.extentStore.readExtent( - extent, - blobCtx.contextId + // Per the Append Block REST contract, the service always computes a CRC64 + // of the appended block and returns it in x-ms-content-crc64. + const stream = await this.extentStore.readExtent(extent, blobCtx.contextId); + const { crc64: calculatedCRC64 } = + await computeAndValidateTransactionalChecksums( + stream, + { md5: contentMD5, crc64: contentCRC64 }, + context.contextId, + { crc64: true } ); - const calculatedContentMD5Buffer = await getMD5FromStream(stream); - const calculatedContentMD5String = Buffer.from( - calculatedContentMD5Buffer - ).toString("base64"); - - if (contentMD5String !== calculatedContentMD5String) { - throw StorageErrorFactory.getMd5Mismatch( - context.contextId, - contentMD5String, - calculatedContentMD5String - ); - } - } const originOffset = blob.properties.contentLength; @@ -206,7 +198,7 @@ export default class AppendBlobHandler extends BaseHandler eTag: properties.etag, lastModified: properties.lastModified, contentMD5: contentMD5Buffer, - xMsContentCrc64: undefined, + xMsContentCrc64: calculatedCRC64, clientRequestId: options.requestId, version: BLOB_API_VERSION, date, diff --git a/src/blob/handlers/BlobHandler.ts b/src/blob/handlers/BlobHandler.ts index 0ab7be045..1ee4b9bc6 100644 --- a/src/blob/handlers/BlobHandler.ts +++ b/src/blob/handlers/BlobHandler.ts @@ -907,6 +907,9 @@ export default class BlobHandler extends BaseHandler implements IBlobHandler { date: context.startTime, copyId: res.copyId, copyStatus, + // Per the Copy Blob From URL REST contract, echo the source's Content-MD5 + // back to the client when it was supplied in x-ms-source-content-md5. + contentMD5: options.sourceContentMD5, clientRequestId: options.requestId }; diff --git a/src/blob/handlers/BlockBlobHandler.ts b/src/blob/handlers/BlockBlobHandler.ts index ec9b11a12..08f54e708 100644 --- a/src/blob/handlers/BlockBlobHandler.ts +++ b/src/blob/handlers/BlockBlobHandler.ts @@ -1,6 +1,5 @@ -import { convertRawHeadersToMetadata } from "../../common/utils/utils"; import { - getMD5FromStream, + convertRawHeadersToMetadata, getMD5FromString, newEtag } from "../../common/utils/utils"; @@ -14,7 +13,10 @@ import { parseXML } from "../generated/utils/xml"; import { BlobModel, BlockModel } from "../persistence/IBlobMetadataStore"; import { BLOB_API_VERSION } from "../utils/constants"; import BaseHandler from "./BaseHandler"; -import { getTagsFromString } from "../utils/utils"; +import { + computeAndValidateTransactionalChecksums, + getTagsFromString +} from "../utils/utils"; /** * BlobHandler handles Azure Storage BlockBlob related requests. @@ -45,11 +47,19 @@ export default class BlockBlobHandler options.blobHTTPHeaders.blobContentType || context.request!.getHeader("content-type") || "application/octet-stream"; - const contentMD5 = context.request!.getHeader("content-md5") - || context.request!.getHeader("x-ms-blob-content-md5") - ? options.blobHTTPHeaders.blobContentMD5 || - context.request!.getHeader("content-md5") - : undefined; + + // Per the Put Blob REST contract, x-ms-blob-content-md5 takes precedence + // over Content-MD5 for transit integrity verification on BlockBlob. + // Verified live. Prefer the SDK-parsed blobContentMD5 option; fall back + // to the raw x-ms-blob-content-md5 header (for clients that inject it + // directly without going through the SDK option); finally fall back to + // Content-MD5. Malformed values are rejected as InvalidMd5 by the + // unified validator below (matches real Azure for all three sources). + const contentMD5 = + options.blobHTTPHeaders.blobContentMD5 + ?? context.request!.getHeader("x-ms-blob-content-md5") + ?? context.request!.getHeader("content-md5"); + const contentCRC64 = options.transactionalContentCrc64; await this.metadataStore.checkContainerExist( context, @@ -68,32 +78,19 @@ export default class BlockBlobHandler ); } - // Calculate MD5 for validation + // MD5 is always needed (persisted as the blob's contentMD5 property); + // CRC64 is computed in the same pass only when the client supplied one. const stream = await this.extentStore.readExtent( persistency, context.contextId ); - const calculatedContentMD5 = await getMD5FromStream(stream); - if (contentMD5 !== undefined) { - if (typeof contentMD5 === "string") { - const calculatedContentMD5String = Buffer.from( - calculatedContentMD5 - ).toString("base64"); - if (contentMD5 !== calculatedContentMD5String) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided contentMD5 doesn't match." - ); - } - } else { - if (!Buffer.from(contentMD5).equals(calculatedContentMD5)) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided contentMD5 doesn't match." - ); - } - } - } + const { md5: calculatedContentMD5 } = + await computeAndValidateTransactionalChecksums( + stream, + { md5: contentMD5, crc64: contentCRC64 }, + context.contextId, + { md5: true } + ); const blob: BlobModel = { deleted: false, @@ -179,14 +176,15 @@ export default class BlockBlobHandler const blobName = blobCtx.blob!; const date = blobCtx.startTime!; - // stageBlock operation doesn't have blobHTTPHeaders + // stageBlock operation doesn't accept blob property headers per the + // Put Block REST contract: only Content-MD5 and x-ms-content-crc64 are + // honored. Verified live: real Azure silently ignores x-ms-blob-content-md5 + // here (even malformed values), so don't use it as a fallback source. // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block - // options.blobHTTPHeaders = options.blobHTTPHeaders || {}; - const contentMD5 = context.request!.getHeader("content-md5") - || context.request!.getHeader("x-ms-blob-content-md5") - ? options.transactionalContentMD5 || - context.request!.getHeader("content-md5") - : undefined; + const contentMD5 = + options.transactionalContentMD5 || + context.request!.getHeader("content-md5"); + const contentCRC64 = options.transactionalContentCrc64; this.validateBlockId(blockId, blobCtx); @@ -208,32 +206,22 @@ export default class BlockBlobHandler ); } - // Calculate MD5 for validation + // Per the Put Block REST contract, the service computes a CRC64 of the + // staged block and echoes it back in x-ms-content-crc64 unless the client + // supplied a Content-MD5 (Azure rejects supplying both). Compute CRC64 + // whenever no MD5 was supplied, regardless of whether the client supplied + // a CRC64 themselves. const stream = await this.extentStore.readExtent( persistency, context.contextId ); - const calculatedContentMD5 = await getMD5FromStream(stream); - if (contentMD5 !== undefined) { - if (typeof contentMD5 === "string") { - const calculatedContentMD5String = Buffer.from( - calculatedContentMD5 - ).toString("base64"); - if (contentMD5 !== calculatedContentMD5String) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided contentMD5 doesn't match." - ); - } - } else { - if (!Buffer.from(contentMD5).equals(calculatedContentMD5)) { - throw StorageErrorFactory.getInvalidOperation( - context.contextId!, - "Provided contentMD5 doesn't match." - ); - } - } - } + const { crc64: calculatedCRC64 } = + await computeAndValidateTransactionalChecksums( + stream, + { md5: contentMD5, crc64: contentCRC64 }, + context.contextId, + { crc64: contentMD5 === undefined } + ); const block: BlockModel = { accountName, @@ -255,6 +243,7 @@ export default class BlockBlobHandler const response: Models.BlockBlobStageBlockResponse = { statusCode: 201, contentMD5: undefined, // TODO: Block content MD5 + xMsContentCrc64: calculatedCRC64, requestId: blobCtx.contextId, version: BLOB_API_VERSION, date, diff --git a/src/blob/handlers/PageBlobHandler.ts b/src/blob/handlers/PageBlobHandler.ts index 429d860ef..0a68a80e3 100644 --- a/src/blob/handlers/PageBlobHandler.ts +++ b/src/blob/handlers/PageBlobHandler.ts @@ -12,8 +12,12 @@ import BlobWriteLeaseValidator from "../lease/BlobWriteLeaseValidator"; import IBlobMetadataStore, { BlobModel } from "../persistence/IBlobMetadataStore"; -import { BLOB_API_VERSION } from "../utils/constants"; -import { deserializePageBlobRangeHeader, getTagsFromString } from "../utils/utils"; +import { BLOB_API_VERSION, HeaderConstants } from "../utils/constants"; +import { + computeAndValidateTransactionalChecksums, + deserializePageBlobRangeHeader, + getTagsFromString +} from "../utils/utils"; import BaseHandler from "./BaseHandler"; import IPageBlobRangesManager from "./IPageBlobRangesManager"; @@ -236,6 +240,19 @@ export default class PageBlobHandler extends BaseHandler ); } + // Transactional integrity validation. Real Azure always returns a + // server-computed x-ms-content-crc64 on Put Page; force CRC64 always. + const contentMD5 = blobCtx.request!.getHeader(HeaderConstants.CONTENT_MD5); + const contentCRC64 = options.transactionalContentCrc64; + const stream = await this.extentStore.readExtent(persistency, blobCtx.contextId); + const { crc64: calculatedCRC64 } = + await computeAndValidateTransactionalChecksums( + stream, + { md5: contentMD5, crc64: contentCRC64 }, + context.contextId, + { crc64: true } + ); + const res = await this.metadataStore.uploadPages( context, blob, @@ -251,7 +268,12 @@ export default class PageBlobHandler extends BaseHandler statusCode: 201, eTag: res.etag, lastModified: date, - contentMD5: undefined, // TODO + contentMD5: contentMD5 === undefined + ? undefined + : typeof contentMD5 === "string" + ? Buffer.from(contentMD5, "base64") + : contentMD5, + xMsContentCrc64: calculatedCRC64, blobSequenceNumber: res.blobSequenceNumber, requestId: blobCtx.contextId, version: BLOB_API_VERSION, diff --git a/src/blob/middlewares/StrictModelMiddlewareFactory.ts b/src/blob/middlewares/StrictModelMiddlewareFactory.ts index 403293282..70565a521 100644 --- a/src/blob/middlewares/StrictModelMiddlewareFactory.ts +++ b/src/blob/middlewares/StrictModelMiddlewareFactory.ts @@ -20,7 +20,6 @@ export const UnsupportedHeadersBlocker: StrictModelRequestValidator = async ( logger: ILogger ): Promise => { const UnsupportedHeaderKeys = [ - HeaderConstants.X_MS_CONTENT_CRC64, HeaderConstants.X_MS_RANGE_GET_CONTENT_CRC64, HeaderConstants.X_MS_ENCRYPTION_KEY, HeaderConstants.X_MS_ENCRYPTION_KEY_SHA256, diff --git a/src/blob/utils/utils.ts b/src/blob/utils/utils.ts index 46b6705fe..13876d129 100644 --- a/src/blob/utils/utils.ts +++ b/src/blob/utils/utils.ts @@ -4,6 +4,132 @@ import StorageErrorFactory from "../errors/StorageErrorFactory"; import { USERDELEGATIONKEY_BASIC_KEY } from "./constants"; import { BlobTag, BlobTags } from "@azure/storage-blob"; import { TagContent } from "../persistence/QueryInterpreter/QueryNodes/IQueryNode"; +import { computeTransactionalChecksums } from "../../common/utils/utils"; + +function decodeBase64HeaderValue(value: string): Buffer | undefined { + if (value.length === 0) { + return Buffer.alloc(0); + } + + // Allow missing padding, but reject non-base64 characters and misplaced '='. + if (!/^[A-Za-z0-9+/]*={0,2}$/.test(value)) { + return undefined; + } + + const firstPadding = value.indexOf("="); + if (firstPadding !== -1 && !/^=+$/.test(value.slice(firstPadding))) { + return undefined; + } + + const unpadded = value.replace(/=+$/, ""); + // Base64 payload length modulo 4 can only be 0, 2, or 3. + if (unpadded.length % 4 === 1) { + return undefined; + } + + const normalized = + unpadded + "=".repeat((4 - (unpadded.length % 4)) % 4); + const decoded = Buffer.from(normalized, "base64"); + + // Ensure the supplied payload is a valid base64 encoding for decoded bytes. + if (decoded.toString("base64").replace(/=+$/, "") !== unpadded) { + return undefined; + } + + return decoded; +} + +/** + * Decodes an MD5 header value (base64 string or raw Uint8Array) and returns + * whether the result is exactly 16 bytes - the only shape real Azure accepts. + * Wrong-length values on Content-MD5, transactionalContentMD5, or + * x-ms-blob-content-md5 are all rejected with InvalidMd5 (verified live). + */ +export function isValidMd5Header(value: Uint8Array | string): boolean { + const bytes = + typeof value === "string" + ? decodeBase64HeaderValue(value) + : Buffer.from(value); + return bytes !== undefined && bytes.length === 16; +} + +/** + * Computes MD5 and/or CRC-64/NVME from a stream in a single pass and validates + * against the request-supplied values. Throws Md5Mismatch / Crc64Mismatch + * (HTTP 400) on mismatch - the documented Azure Storage error codes for + * transactional integrity failures. + * + * Rejects requests that supply both checksums with `BothCrc64AndMd5HeaderPresent` + * (HTTP 400), matching the real Azure service contract. + * + * A checksum is computed when its `expected` value is provided, OR when the + * corresponding `force` flag is set (for callers that need the value for + * non-validation purposes - e.g. Put Blob persists MD5 as a blob property). + */ +export async function computeAndValidateTransactionalChecksums( + stream: NodeJS.ReadableStream, + expected: { md5?: Uint8Array | string; crc64?: Uint8Array | string }, + contextId: string | undefined, + force?: { md5?: boolean; crc64?: boolean } +): Promise<{ md5?: Uint8Array; crc64?: Uint8Array }> { + if (expected.md5 !== undefined && expected.crc64 !== undefined) { + throw StorageErrorFactory.getBothCrc64AndMd5HeaderPresent(contextId); + } + if (expected.md5 !== undefined && !isValidMd5Header(expected.md5)) { + throw StorageErrorFactory.getInvalidMd5(contextId); + } + const expectedCrc64RawHeader = + typeof expected.crc64 === "string" + ? expected.crc64 + : expected.crc64 !== undefined + ? Buffer.from(expected.crc64).toString("base64") + : undefined; + + const expectedCrc64Bytes = + expected.crc64 === undefined + ? undefined + : typeof expected.crc64 === "string" + ? decodeBase64HeaderValue(expected.crc64) + : Buffer.from(expected.crc64); + + if ( + expected.crc64 !== undefined && + (expectedCrc64Bytes === undefined || expectedCrc64Bytes.length < 8) + ) { + // CRC-64/NVME is a 64-bit value; the wire format is base64-encoded bytes. + // Verified against real Azure: <8 bytes is rejected as InvalidHeaderValue; + // >=8 bytes is accepted at header-validation and falls through to a value + // comparison (which then surfaces as Crc64Mismatch if it doesn't match). + throw StorageErrorFactory.getInvalidHeaderValue(contextId, { + HeaderName: "x-ms-content-crc64", + HeaderValue: expectedCrc64RawHeader ?? "" + }); + } + const calculated = await computeTransactionalChecksums(stream, expected, force); + + if (expected.md5 !== undefined) { + const expectedMd5Bytes = + typeof expected.md5 === "string" + ? decodeBase64HeaderValue(expected.md5)! + : Buffer.from(expected.md5); + const calculatedMd5Bytes = Buffer.from(calculated.md5!); + if (!expectedMd5Bytes.equals(calculatedMd5Bytes)) { + const expectedMd5 = expectedMd5Bytes.toString("base64"); + const calculatedMd5 = calculatedMd5Bytes.toString("base64"); + throw StorageErrorFactory.getMd5Mismatch(contextId, expectedMd5, calculatedMd5); + } + } + if (expectedCrc64Bytes !== undefined) { + const calculatedCrc64Bytes = Buffer.from(calculated.crc64!); + if (!expectedCrc64Bytes.equals(calculatedCrc64Bytes)) { + const expectedCrc64 = expectedCrc64Bytes.toString("base64"); + const calculatedCrc64 = calculatedCrc64Bytes.toString("base64"); + throw StorageErrorFactory.getCrc64Mismatch(contextId, expectedCrc64, calculatedCrc64); + } + } + + return calculated; +} export function checkApiVersion( inputApiVersion: string, diff --git a/src/common/utils/utils.ts b/src/common/utils/utils.ts index 3debf4f56..cbff27afb 100644 --- a/src/common/utils/utils.ts +++ b/src/common/utils/utils.ts @@ -169,3 +169,132 @@ export async function getMD5FromStream( }); }); } + +// CRC-64/NVME implementation for Azure Storage transactional integrity checks. +// This is the variant the Azure Blob service uses on x-ms-content-crc64; the +// wire format is little-endian (LSB byte first). +// +// Parameters: +// width = 64 +// poly = 0xad93d23594c93659 (reflected form: 0x9a6c9329ac4bc9b5) +// init = 0xffffffffffffffff +// refin = true +// refout = true +// xorout = 0xffffffffffffffff +// check = 0xae8b14860a799888 ("123456789") +// +// Represented as two 32-bit halves (hi, lo) so we don't need BigInt - Azurite +// supports Node engines down to 10.0.0 where BigInt isn't reliable. Since this +// is a reflected (right-shift) CRC, `lo` holds the bits that get consumed by +// the next input byte. +const CRC64_POLY_HI = 0x9a6c9329; +const CRC64_POLY_LO = 0xac4bc9b5; + +// Flat table: entry i occupies [i*2] (hi) and [i*2+1] (lo). +const CRC64_TABLE: readonly number[] = (() => { + const table: number[] = new Array(512); + for (let i = 0; i < 256; i++) { + let hi = 0; + let lo = i; + for (let j = 0; j < 8; j++) { + const xorPoly = (lo & 1) !== 0; + const newLo = ((hi & 1) << 31) | (lo >>> 1); + const newHi = hi >>> 1; + if (xorPoly) { + hi = (newHi ^ CRC64_POLY_HI) >>> 0; + lo = (newLo ^ CRC64_POLY_LO) >>> 0; + } else { + hi = newHi >>> 0; + lo = newLo >>> 0; + } + } + table[i * 2] = hi; + table[i * 2 + 1] = lo; + } + return table; +})(); + +function crc64Accumulate( + crcHi: number, crcLo: number, chunk: Uint8Array +): [number, number] { + for (let i = 0; i < chunk.length; i++) { + const index = (crcLo ^ chunk[i]) & 0xff; + const tHi = CRC64_TABLE[index * 2]; + const tLo = CRC64_TABLE[index * 2 + 1]; + const newLo = ((crcHi & 0xff) << 24) | (crcLo >>> 8); + const newHi = crcHi >>> 8; + crcHi = (newHi ^ tHi) >>> 0; + crcLo = (newLo ^ tLo) >>> 0; + } + return [crcHi, crcLo]; +} + +// Initial CRC state is 0 XOR 0xFFFFFFFFFFFFFFFF = 0xFFFFFFFF_FFFFFFFF. +const CRC64_INIT_HI = 0xffffffff; +const CRC64_INIT_LO = 0xffffffff; + +function crc64ToUint8Array(hi: number, lo: number): Uint8Array { + // Apply xorout (0xFFFFFFFFFFFFFFFF) and serialize little-endian: LSB first. + const buf = Buffer.allocUnsafe(8); + buf.writeUInt32LE((lo ^ 0xffffffff) >>> 0, 0); + buf.writeUInt32LE((hi ^ 0xffffffff) >>> 0, 4); + return buf; +} + +export function getCRC64FromString(text: string): Uint8Array { + const [hi, lo] = crc64Accumulate(CRC64_INIT_HI, CRC64_INIT_LO, Buffer.from(text)); + return crc64ToUint8Array(hi, lo); +} + +export async function getCRC64FromStream( + stream: NodeJS.ReadableStream +): Promise { + return new Promise((resolve, reject) => { + let hi = CRC64_INIT_HI, lo = CRC64_INIT_LO; + stream + .on("data", (chunk: Buffer | string) => { + const data = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string); + [hi, lo] = crc64Accumulate(hi, lo, data); + }) + .on("end", () => { + resolve(crc64ToUint8Array(hi, lo)); + }) + .on("error", reject); + }); +} + +/** + * Computes MD5 and/or CRC-64/NVME in a single stream pass. A checksum is + * computed when the corresponding `expected` value is provided OR when `force` + * is set for that field. The other is returned as undefined. + * + * `expected` is the caller's request-supplied value (only its presence matters + * here; comparison happens at the caller). `force` is for callers that need a + * checksum for purposes other than validation - e.g. Put Blob always needs MD5 + * because it's persisted as the blob's contentMD5 property. + */ +export async function computeTransactionalChecksums( + stream: NodeJS.ReadableStream, + expected: { md5?: Uint8Array | string; crc64?: Uint8Array | string }, + force?: { md5?: boolean; crc64?: boolean } +): Promise<{ md5?: Uint8Array; crc64?: Uint8Array }> { + const needMd5 = expected.md5 !== undefined || !!force?.md5; + const needCrc64 = expected.crc64 !== undefined || !!force?.crc64; + const hash = needMd5 ? createHash("md5") : undefined; + return new Promise((resolve, reject) => { + let hi = CRC64_INIT_HI, lo = CRC64_INIT_LO; + stream + .on("data", (chunk: Buffer | string) => { + const data = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string); + if (hash) hash.update(data); + if (needCrc64) [hi, lo] = crc64Accumulate(hi, lo, data); + }) + .on("end", () => { + resolve({ + md5: hash ? hash.digest() : undefined, + crc64: needCrc64 ? crc64ToUint8Array(hi, lo) : undefined, + }); + }) + .on("error", reject); + }); +} diff --git a/tests/BlobTestServerFactory.ts b/tests/BlobTestServerFactory.ts index d8c4311cf..2360f5b04 100644 --- a/tests/BlobTestServerFactory.ts +++ b/tests/BlobTestServerFactory.ts @@ -5,6 +5,19 @@ import SqlBlobServer from "../src/blob/SqlBlobServer"; import { StoreDestinationArray } from "../src/common/persistence/IExtentStore"; import { DEFAULT_SQL_OPTIONS } from "../src/common/utils/constants"; import { DEFAULT_BLOB_KEEP_ALIVE_TIMEOUT } from "../src/blob/utils/constants"; +import { LIVE_TEST_MODE } from "./testutils"; + +/** + * No-op stand-in returned in live mode. Tests call start/close/clean on the + * "server", but in live mode there's no local server to manage - we just need + * an object with a `config` whose host/port the test fixture can read. + */ +class LiveModeStubServer { + public readonly config = { host: "live.azure", port: 443 }; + public async start(): Promise { /* no-op */ } + public async close(): Promise { /* no-op */ } + public async clean(): Promise { /* no-op */ } +} export default class BlobTestServerFactory { public createServer( @@ -13,6 +26,9 @@ export default class BlobTestServerFactory { https: boolean = false, oauth?: string ): BlobServer | SqlBlobServer { + if (LIVE_TEST_MODE) { + return new LiveModeStubServer() as unknown as BlobServer; + } const databaseConnectionString = process.env.AZURITE_TEST_DB; const isSQL = databaseConnectionString !== undefined; const inMemoryPersistence = process.env.AZURITE_TEST_INMEMORYPERSISTENCE !== undefined; diff --git a/tests/blob/apis/appendblob.test.ts b/tests/blob/apis/appendblob.test.ts index 786250927..dbe72e78d 100644 --- a/tests/blob/apis/appendblob.test.ts +++ b/tests/blob/apis/appendblob.test.ts @@ -8,12 +8,17 @@ import * as assert from "assert"; import { BlobType } from "../../../src/blob/generated/artifacts/models"; import { configLogger } from "../../../src/common/Logger"; -import { getMD5FromString } from "../../../src/common/utils/utils"; +import { + getCRC64FromString, + getMD5FromString +} from "../../../src/common/utils/utils"; +import * as crypto from "crypto"; import BlobTestServerFactory from "../../BlobTestServerFactory"; import { bodyToString, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, sleep } from "../../testutils"; @@ -25,7 +30,7 @@ describe("AppendBlobAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( @@ -451,6 +456,100 @@ describe("AppendBlobAPIs", () => { assert.deepStrictEqual(string, "abcdef123456T@"); }); + it("AppendBlock with correct crc64 should succeed and echo crc64 @loki", async () => { + await appendBlobClient.create(); + const body = "HelloWorld"; + const crc64 = getCRC64FromString(body); + + const result = await appendBlobClient.appendBlock(body, body.length, { + transactionalContentCrc64: new Uint8Array(crc64) + }); + + assert.equal(result._response.status, 201); + assert.ok( + result.xMsContentCrc64 !== undefined, + "Response should include x-ms-content-crc64" + ); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(crc64), + "Echoed CRC64 must match what was sent" + ); + }); + + it("AppendBlock with wrong crc64 should throw mismatch @loki", async () => { + await appendBlobClient.create(); + const body = "HelloWorld"; + const wrongCrc64 = getCRC64FromString("differentBody"); + + try { + await appendBlobClient.appendBlock(body, body.length, { + transactionalContentCrc64: new Uint8Array(wrongCrc64) + }); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Crc64Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("AppendBlock with wrong md5 should throw mismatch @loki", async () => { + await appendBlobClient.create(); + const body = "HelloWorld"; + const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + + try { + await appendBlobClient.appendBlock(body, body.length, { + transactionalContentMD5: new Uint8Array(wrongMd5) + }); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Md5Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("AppendBlock without any checksum header should still echo computed crc64 @loki", async () => { + // Per the Append Block REST contract, the service always computes a CRC64 + // of the appended block and returns it in x-ms-content-crc64, even when + // the client didn't supply one. The echoed value must match the canonical + // CRC-64/NVME. + await appendBlobClient.create(); + const body = "HelloWorld"; + const result = await appendBlobClient.appendBlock(body, body.length); + assert.equal(result._response.status, 201); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(getCRC64FromString(body)) + ); + }); + + it("AppendBlock with both md5 and crc64 supplied should be rejected @loki", async () => { + // Real Azure rejects requests that supply both Content-MD5 and + // x-ms-content-crc64 - Azurite must match. + await appendBlobClient.create(); + const body = "HelloWorld"; + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const crc64 = getCRC64FromString(body); + + try { + await appendBlobClient.appendBlock(body, body.length, { + transactionalContentMD5: new Uint8Array(md5), + transactionalContentCrc64: new Uint8Array(crc64) + }); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); + return; + } + assert.fail("Did not throw an exception."); + }); + it("AppendBlock with ifTags should work @loki", async () => { await appendBlobClient.create(); diff --git a/tests/blob/apis/blob.test.ts b/tests/blob/apis/blob.test.ts index 6c4358bda..d0831b694 100644 --- a/tests/blob/apis/blob.test.ts +++ b/tests/blob/apis/blob.test.ts @@ -4,9 +4,11 @@ import { newPipeline, BlobServiceClient, BlobItem, + BlobSASPermissions, Tags } from "@azure/storage-blob"; import * as assert from "assert"; +import * as crypto from "crypto"; import { BlobCopySourceTags, BlobHTTPHeaders } from "../../../src/blob/generated/artifacts/models"; import { configLogger } from "../../../src/common/Logger"; @@ -15,6 +17,7 @@ import { bodyToString, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, sleep } from "../../testutils"; @@ -28,7 +31,7 @@ describe("BlobAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( @@ -1661,6 +1664,57 @@ describe("BlobAPIs", () => { ); }); + it("Synchronized copy blob echoes source Content-MD5 in response when supplied @loki", async () => { + // Per the Copy Blob From URL REST contract, when the client supplies + // x-ms-source-content-md5 the service echoes it back as Content-MD5 on + // the response (so the client can correlate against the source's hash). + // Real Azure requires the source URL to carry auth - generate a read SAS + // (which the emulator also accepts). + const sourceBlob = getUniqueName("blob"); + const destBlob = getUniqueName("blob"); + + const sourceBlobClient = containerClient.getBlockBlobClient(sourceBlob); + const destBlobClient = containerClient.getBlockBlobClient(destBlob); + + const body = "hello"; + await sourceBlobClient.upload(body, body.length); + const sourceUrl = await sourceBlobClient.generateSasUrl({ + permissions: BlobSASPermissions.parse("r"), + expiresOn: new Date(Date.now() + 60 * 60 * 1000) + }); + + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const result_copy = await destBlobClient.syncCopyFromURL(sourceUrl, { + sourceContentMD5: new Uint8Array(md5) + }); + + assert.equal(result_copy.copyStatus, "success"); + assert.deepStrictEqual( + Buffer.from(result_copy.contentMD5!), + Buffer.from(md5), + "Response Content-MD5 must echo the source-supplied value" + ); + }); + + it("Synchronized copy blob omits Content-MD5 in response when not supplied @loki", async () => { + // Without x-ms-source-content-md5, the response does not include Content-MD5. + const sourceBlob = getUniqueName("blob"); + const destBlob = getUniqueName("blob"); + + const sourceBlobClient = containerClient.getBlockBlobClient(sourceBlob); + const destBlobClient = containerClient.getBlockBlobClient(destBlob); + + await sourceBlobClient.upload("hello", 5); + const sourceUrl = await sourceBlobClient.generateSasUrl({ + permissions: BlobSASPermissions.parse("r"), + expiresOn: new Date(Date.now() + 60 * 60 * 1000) + }); + + const result_copy = await destBlobClient.syncCopyFromURL(sourceUrl); + assert.equal(result_copy.copyStatus, "success"); + assert.strictEqual(result_copy.contentMD5, undefined); + }); + it("Synchronized copy blob should work to override metadata @loki", async () => { const sourceBlob = getUniqueName("blob"); const destBlob = getUniqueName("blob"); @@ -2528,6 +2582,8 @@ describe("BlobAPIs", () => { }); it("upload invalid x-ms-blob-content-md5 @loki @sql", async () => { + // Real Azure rejects a malformed x-ms-blob-content-md5 (not 16 bytes after + // base64 decode) with InvalidMd5. const pipeline = newPipeline( new StorageSharedKeyCredential( EMULATOR_ACCOUNT_NAME, @@ -2551,8 +2607,7 @@ describe("BlobAPIs", () => { assert.fail("Expected MD5 error"); } catch (err) { assert.deepStrictEqual((err as any).statusCode, 400); - assert.deepStrictEqual((err as any).code, 'InvalidOperation'); - assert.deepStrictEqual((err as any).details.errorCode, 'InvalidOperation'); + assert.deepStrictEqual((err as any).code, "InvalidMd5"); } }); diff --git a/tests/blob/apis/blobbatch.test.ts b/tests/blob/apis/blobbatch.test.ts index 594dcb5a6..ddadf40e5 100644 --- a/tests/blob/apis/blobbatch.test.ts +++ b/tests/blob/apis/blobbatch.test.ts @@ -12,7 +12,7 @@ import { import assert from "assert"; import { configLogger } from "../../../src/common/Logger"; import BlobTestServerFactory from "../../BlobTestServerFactory"; -import { EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, getUniqueName } from "../../testutils"; +import { EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, getTestServerBaseURL, getUniqueName } from "../../testutils"; // Set true to enable debug log configLogger(false); @@ -21,7 +21,7 @@ describe("Blob batch API", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( diff --git a/tests/blob/apis/blockblob.test.ts b/tests/blob/apis/blockblob.test.ts index 0f03201cd..5439168cf 100644 --- a/tests/blob/apis/blockblob.test.ts +++ b/tests/blob/apis/blockblob.test.ts @@ -5,6 +5,7 @@ import { BlobSASPermissions, Tags } from "@azure/storage-blob"; +import CustomHeaderPolicyFactory from "../RequestPolicy/CustomHeaderPolicyFactory"; import * as assert from "assert"; import * as crypto from "crypto"; @@ -15,10 +16,14 @@ import { bodyToString, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, sleep } from "../../testutils"; -import { getMD5FromString } from "../../../src/common/utils/utils"; +import { + getCRC64FromString, + getMD5FromString +} from "../../../src/common/utils/utils"; // Set true to enable debug log configLogger(false); @@ -27,7 +32,7 @@ describe("BlockBlobAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( @@ -71,6 +76,35 @@ describe("BlockBlobAPIs", () => { await containerClient.delete(); }); + // Temporary helper: The SDK's TypeScript wrapper does not expose some + // checksum headers on BlockBlobUploadOptions yet, so tests inject raw HTTP + // headers through a custom pipeline policy. Remove this once the TypeScript + // wrapper surfaces these headers on the public options type. + function getBlockBlobClientWithRawHeaders( + container: string, + blob: string, + headers: Array<{ key: string; value: string }> + ) { + const pipeline = newPipeline( + new StorageSharedKeyCredential( + EMULATOR_ACCOUNT_NAME, + EMULATOR_ACCOUNT_KEY + ), + { + retryOptions: { maxTries: 1 }, + keepAliveOptions: { enable: false } + } + ); + for (const header of headers) { + pipeline.factories.unshift( + new CustomHeaderPolicyFactory(header.key, header.value) + ); + } + + const customClient = new BlobServiceClient(baseURL, pipeline); + return customClient.getContainerClient(container).getBlockBlobClient(blob); + } + it("Block blob upload should refresh lease state @loki @sql", async () => { await blockBlobClient.upload('a', 1); @@ -140,6 +174,151 @@ describe("BlockBlobAPIs", () => { assert.deepStrictEqual(await bodyToString(result, 0), ""); }); + it("upload (PutBlob) with correct crc64 should succeed @loki @sql", async () => { + const body = "HelloWorld"; + const crc64 = getCRC64FromString(body); + const clientWithCrc64 = getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "x-ms-content-crc64", + value: Buffer.from(crc64).toString("base64") + } + ]); + const result = await clientWithCrc64.upload(body, body.length); + assert.equal(result._response.status, 201); + + const downloaded = await blobClient.download(0); + assert.deepStrictEqual(await bodyToString(downloaded, body.length), body); + }); + + it("upload (PutBlob) with wrong crc64 should throw mismatch @loki @sql", async () => { + const body = "HelloWorld"; + const wrongCrc64 = getCRC64FromString("differentBody"); + const clientWithWrongCrc64 = getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "x-ms-content-crc64", + value: Buffer.from(wrongCrc64).toString("base64") + } + ]); + try { + await clientWithWrongCrc64.upload(body, body.length); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Crc64Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("upload (PutBlob) with wrong md5 should throw mismatch @loki @sql", async () => { + const body = "HelloWorld"; + const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + const clientWithWrongMd5 = getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "content-md5", + value: Buffer.from(wrongMd5).toString("base64") + } + ]); + try { + await clientWithWrongMd5.upload(body, body.length); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Md5Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("upload (PutBlob) x-ms-blob-content-md5 takes precedence over Content-MD5 @loki @sql", async () => { + // Per the Put Blob REST contract, x-ms-blob-content-md5 takes precedence + // over Content-MD5 for transit integrity verification on BlockBlob. + // - Content-MD5 wrong + x-ms-blob-content-md5 correct -> success + // - Content-MD5 correct + x-ms-blob-content-md5 wrong -> Md5Mismatch + const body = "HelloWorld"; + const correctMd5 = crypto.createHash("md5").update(body, "utf8").digest(); + const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + + const clientWithWrongContentAndCorrectBlobMd5 = + getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "content-md5", + value: Buffer.from(wrongMd5).toString("base64") + }, + { + key: "x-ms-blob-content-md5", + value: Buffer.from(correctMd5).toString("base64") + } + ]); + + // Wrong transactional + correct blob-content-md5 -> success. + await clientWithWrongContentAndCorrectBlobMd5.upload(body, body.length); + + const clientWithCorrectContentAndWrongBlobMd5 = + getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "content-md5", + value: Buffer.from(correctMd5).toString("base64") + }, + { + key: "x-ms-blob-content-md5", + value: Buffer.from(wrongMd5).toString("base64") + } + ]); + + // Correct transactional + wrong blob-content-md5 -> Md5Mismatch. + try { + await clientWithCorrectContentAndWrongBlobMd5.upload(body, body.length); + assert.fail("Expected Md5Mismatch when x-ms-blob-content-md5 is wrong."); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Md5Mismatch"); + } + }); + + it("upload (PutBlob) with wrong-length x-ms-blob-content-md5 should be rejected @loki @sql", async () => { + // x-ms-blob-content-md5 must decode to exactly 16 bytes. Verified live: + // real Azure rejects wrong-length values with InvalidMd5 (not + // InvalidHeaderValue, despite x-ms-blob-content-md5 being a property + // header). Azurite routes all MD5 sources through the same validator. + const body = "HelloWorld"; + const wrongLength = new Uint8Array([0, 0, 0, 0]); + try { + await blockBlobClient.upload(body, body.length, { + blobHTTPHeaders: { blobContentMD5: wrongLength } + }); + } catch (e: any) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "InvalidMd5"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("upload (PutBlob) with both md5 and crc64 supplied should be rejected @loki @sql", async () => { + // Real Azure rejects requests that supply both Content-MD5 and + // x-ms-content-crc64 - Azurite must match. + const body = "HelloWorld"; + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const crc64 = getCRC64FromString(body); + const clientWithCrc64AndMd5 = getBlockBlobClientWithRawHeaders(containerName, blobName, [ + { + key: "x-ms-content-crc64", + value: Buffer.from(crc64).toString("base64") + }, + { + key: "content-md5", + value: Buffer.from(md5).toString("base64") + } + ]); + try { + await clientWithCrc64AndMd5.upload(body, body.length); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); + return; + } + assert.fail("Did not throw an exception."); + }); + it("upload with string body and all parameters set @loki @sql", async () => { const body: string = getUniqueName("randomstring"); const options = { @@ -274,8 +453,10 @@ describe("BlockBlobAPIs", () => { it("stageBlock with wrong body should throw md5 mismatch @loki @sql", async () => { const body = "HelloWorld"; - const md5 = new Uint8Array(Buffer.from("anotherBody")); - const options = { transactionalContentMD5: md5 }; + // A valid 16-byte MD5 of a *different* body, to exercise the mismatch + // path rather than the InvalidMd5 (wrong-length) path. + const md5 = crypto.createHash("md5").update("anotherBody", "utf8").digest(); + const options = { transactionalContentMD5: new Uint8Array(md5) }; try { await blockBlobClient.stageBlock( @@ -287,10 +468,31 @@ describe("BlockBlobAPIs", () => { } catch (e) { assert.equal(e.name, "RestError"); assert.equal(e.statusCode, 400); - assert.equal( - e.details.message.indexOf("Provided contentMD5 doesn't match."), - 0 + assert.equal(e.code, "Md5Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("stageBlock with wrong-length MD5 should be rejected @loki @sql", async () => { + // Content-MD5 must decode to exactly 16 bytes. This test pins which error + // code the service returns for a malformed (4-byte) MD5 header so Azurite + // can be verified against real Azure. + const body = "HelloWorld"; + const wrongLengthMd5 = new Uint8Array([0, 0, 0, 0]); + const options = { transactionalContentMD5: wrongLengthMd5 }; + + try { + await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options ); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "InvalidMd5"); return; } assert.fail("Did not throw an exception."); @@ -316,6 +518,146 @@ describe("BlockBlobAPIs", () => { assert.equal(listResponse.uncommittedBlocks![0].size, body.length); }); + it("stageBlock with correct crc64 should succeed @loki @sql", async () => { + const body = "HelloWorld"; + const crc64 = getCRC64FromString(body); + const options = { transactionalContentCrc64: new Uint8Array(crc64) }; + + const result = await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options + ); + + assert.equal(result._response.status, 201); + // Server must echo back the CRC64 it validated against + assert.ok( + result.xMsContentCrc64 !== undefined, + "Response should include x-ms-content-crc64" + ); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(crc64), + "Echoed CRC64 must match what was sent" + ); + + const listResponse = await blockBlobClient.getBlockList("uncommitted"); + assert.equal(listResponse.uncommittedBlocks!.length, 1); + assert.equal(listResponse.uncommittedBlocks![0].name, base64encode("1")); + assert.equal(listResponse.uncommittedBlocks![0].size, body.length); + }); + + it("stageBlock with wrong-length CRC64 should be rejected @loki @sql", async () => { + // x-ms-content-crc64 must decode to at least 8 bytes (CRC-64 is 64-bit). + // Real Azure rejects shorter values with InvalidHeaderValue; this test + // pins that contract for Azurite. + const body = "HelloWorld"; + const wrongLengthCrc64 = new Uint8Array([0, 0, 0, 0]); + const options = { transactionalContentCrc64: wrongLengthCrc64 }; + + try { + await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options + ); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "InvalidHeaderValue"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("stageBlock with wrong body should throw crc64 mismatch @loki @sql", async () => { + const body = "HelloWorld"; + // Provide CRC64 of a different payload - server must reject the upload + const wrongCrc64 = getCRC64FromString("differentBody"); + const options = { transactionalContentCrc64: new Uint8Array(wrongCrc64) }; + + try { + await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options + ); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Crc64Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("stageBlock with both md5 and crc64 supplied should be rejected @loki @sql", async () => { + // Real Azure rejects requests that supply both Content-MD5 and + // x-ms-content-crc64 - Azurite must match. + const body = "HelloWorld"; + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const crc64 = getCRC64FromString(body); + const options = { + transactionalContentMD5: new Uint8Array(md5), + transactionalContentCrc64: new Uint8Array(crc64) + }; + + try { + await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length, + options + ); + } catch (e) { + assert.equal(e.name, "RestError"); + assert.equal(e.statusCode, 400); + assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("stageBlock ignores x-ms-blob-content-md5 (not a Put Block REST header) @loki @sql", async () => { + // Per the Put Block REST contract, x-ms-blob-content-md5 is NOT a Put Block + // header. Real Azure silently ignores it (even when malformed). Azurite + // must match: a bogus x-ms-blob-content-md5 must not cause validation or + // an error. + const pipeline = newPipeline( + new StorageSharedKeyCredential(EMULATOR_ACCOUNT_NAME, EMULATOR_ACCOUNT_KEY), + { retryOptions: { maxTries: 1 }, keepAliveOptions: { enable: false } } + ); + pipeline.factories.unshift( + new CustomHeaderPolicyFactory("x-ms-blob-content-md5", "AAAAAAAAAAA=") + ); + const altClient = new BlobServiceClient(baseURL, pipeline) + .getContainerClient(containerName) + .getBlockBlobClient(blobName); + + const result = await altClient.stageBlock(base64encode("1"), "HelloWorld", 10); + assert.equal(result._response.status, 201); + }); + + it("stageBlock without any checksum header should still echo computed crc64 @loki @sql", async () => { + // Per the Put Block REST contract, the service computes a CRC64 of the + // block and returns it in x-ms-content-crc64 even when the client didn't + // supply one. The echoed value must match the canonical CRC-64/NVME. + const body = "HelloWorld"; + const result = await blockBlobClient.stageBlock( + base64encode("1"), + body, + body.length + ); + assert.equal(result._response.status, 201); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(getCRC64FromString(body)) + ); + }); + it("commitBlockList @loki @sql", async () => { const body = "HelloWorld"; await blockBlobClient.stageBlock(base64encode("1"), body, body.length); diff --git a/tests/blob/apis/container.test.ts b/tests/blob/apis/container.test.ts index 044fcad30..933e7db2c 100644 --- a/tests/blob/apis/container.test.ts +++ b/tests/blob/apis/container.test.ts @@ -18,6 +18,7 @@ import { base64encode, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, sleep } from "../../testutils"; @@ -30,7 +31,7 @@ describe("ContainerAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( diff --git a/tests/blob/apis/pageblob.test.ts b/tests/blob/apis/pageblob.test.ts index 66781d21b..f1ebf0d4f 100644 --- a/tests/blob/apis/pageblob.test.ts +++ b/tests/blob/apis/pageblob.test.ts @@ -13,9 +13,11 @@ import { bodyToString, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName } from "../../testutils"; -import { getMD5FromString } from "../../../src/common/utils/utils"; +import { getCRC64FromString, getMD5FromString } from "../../../src/common/utils/utils"; +import * as crypto from "crypto"; // Set true to enable debug log configLogger(false); @@ -24,7 +26,7 @@ describe("PageBlobAPIs", () => { const factory = new BlobTestServerFactory(); const server = factory.createServer(); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( @@ -532,6 +534,87 @@ describe("PageBlobAPIs", () => { assert.fail(); }); + it("uploadPages with correct crc64 should succeed and echo crc64 @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const crc64 = getCRC64FromString(body); + + const result = await pageBlobClient.uploadPages(body, 0, length, { + transactionalContentCrc64: new Uint8Array(crc64) + }); + assert.equal(result._response.status, 201); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(crc64) + ); + }); + + it("uploadPages with wrong crc64 should throw mismatch @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const wrongCrc64 = getCRC64FromString("b".repeat(length)); + try { + await pageBlobClient.uploadPages(body, 0, length, { + transactionalContentCrc64: new Uint8Array(wrongCrc64) + }); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Crc64Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("uploadPages with wrong md5 should throw mismatch @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const wrongMd5 = crypto.createHash("md5").update("differentBody", "utf8").digest(); + try { + await pageBlobClient.uploadPages(body, 0, length, { + transactionalContentMD5: new Uint8Array(wrongMd5) + }); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "Md5Mismatch"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("uploadPages with both md5 and crc64 supplied should be rejected @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const md5 = crypto.createHash("md5").update(body, "utf8").digest(); + const crc64 = getCRC64FromString(body); + try { + await pageBlobClient.uploadPages(body, 0, length, { + transactionalContentMD5: new Uint8Array(md5), + transactionalContentCrc64: new Uint8Array(crc64) + }); + } catch (e) { + assert.equal(e.statusCode, 400); + assert.equal(e.code, "BothCrc64AndMd5HeaderPresent"); + return; + } + assert.fail("Did not throw an exception."); + }); + + it("uploadPages without any checksum header should still echo computed crc64 @loki", async () => { + const length = 512; + await pageBlobClient.create(length); + const body = "a".repeat(length); + const result = await pageBlobClient.uploadPages(body, 0, length); + assert.equal(result._response.status, 201); + assert.deepStrictEqual( + Buffer.from(result.xMsContentCrc64!), + Buffer.from(getCRC64FromString(body)) + ); + }); + it("uploadPages with sequential pages @loki", async () => { const length = 512 * 3; await pageBlobClient.create(length); diff --git a/tests/blob/blockblob.highlevel.test.ts b/tests/blob/blockblob.highlevel.test.ts index 9bdb29133..c439dc369 100644 --- a/tests/blob/blockblob.highlevel.test.ts +++ b/tests/blob/blockblob.highlevel.test.ts @@ -15,6 +15,7 @@ import { createRandomLocalFile, EMULATOR_ACCOUNT_KEY, EMULATOR_ACCOUNT_NAME, + getTestServerBaseURL, getUniqueName, readStreamToLocalFile, rmRecursive @@ -29,7 +30,7 @@ describe("BlockBlobHighlevel", () => { // Loose model to bypass if-match header used by download retry const server = factory.createServer(true); - const baseURL = `http://${server.config.host}:${server.config.port}/devstoreaccount1`; + const baseURL = getTestServerBaseURL(server); const serviceClient = new BlobServiceClient( baseURL, newPipeline( diff --git a/tests/blob/handlers/AppendBlobHandler.test.ts b/tests/blob/handlers/AppendBlobHandler.test.ts index 739d2a988..a8ef5a57f 100644 --- a/tests/blob/handlers/AppendBlobHandler.test.ts +++ b/tests/blob/handlers/AppendBlobHandler.test.ts @@ -94,6 +94,13 @@ describe("AppendBlobHandler", () => { // so we accept any PassThrough stream here. extentStore.appendExtent(anyOfClass(PassThrough), blobCtx.contextId) ).thenResolve(extent); + // appendBlock always re-reads the persisted extent to compute checksums, + // so provide a fresh readable stream for each invocation. + when(extentStore.readExtent(extent, blobCtx.contextId)).thenCall(() => { + const readStream = new PassThrough(); + readStream.end(buffer); + return Promise.resolve(readStream); + }); describe("create", () => { it("accepts requests withContent-Length == 0 @loki", async () => { @@ -195,7 +202,7 @@ describe("AppendBlobHandler", () => { }); }); - it("rejects requests with invalid MD5 checksum @loki", async () => { + it("rejects requests with malformed MD5 checksum @loki", async () => { when(request.getHeader(HeaderConstants.CONTENT_MD5)).thenReturn( "d3JvbmdfTUQ1X2NoZWNrc3VtCg==" ); @@ -215,7 +222,7 @@ describe("AppendBlobHandler", () => { }, { name: "StorageError", - storageErrorCode: "Md5Mismatch" + storageErrorCode: "InvalidMd5" } ); }); diff --git a/tests/blob/utils.test.ts b/tests/blob/utils.test.ts index 9ecd72232..58588acb8 100644 --- a/tests/blob/utils.test.ts +++ b/tests/blob/utils.test.ts @@ -1,5 +1,12 @@ import * as assert from "assert"; -import { convertRawHeadersToMetadata } from "../../src/common/utils/utils"; +import { PassThrough } from "stream"; +import { computeAndValidateTransactionalChecksums } from "../../src/blob/utils/utils"; +import { + convertRawHeadersToMetadata, + getCRC64FromStream, + getCRC64FromString, + getMD5FromString +} from "../../src/common/utils/utils"; describe("Utils", () => { it("convertRawHeadersToMetadata should work", () => { @@ -57,3 +64,176 @@ describe("Utils", () => { assert.deepStrictEqual(metadata, undefined); }); }); + +describe("CRC64", () => { + // CRC-64/NVME check value for "123456789" per the CRC catalogue: + // https://reveng.sourceforge.io/crc-catalogue/all.htm - the numeric value is + // 0xae8b14860a799888, serialised on the wire as 8 little-endian bytes. + it("getCRC64FromString matches the standard CRC-64/NVME check value for '123456789'", () => { + const result = getCRC64FromString("123456789"); + const hex = Buffer.from(result).toString("hex"); + assert.strictEqual(hex, "8898790a86148bae"); + }); + + it("getCRC64FromString produces an 8-byte result", () => { + assert.strictEqual(getCRC64FromString("").length, 8); + assert.strictEqual(getCRC64FromString("Hello, World!").length, 8); + }); + + it("getCRC64FromStream matches getCRC64FromString for the same data", async () => { + const data = "The quick brown fox jumps over the lazy dog"; + const fromString = getCRC64FromString(data); + + const stream = new PassThrough(); + stream.end(Buffer.from(data)); + const fromStream = await getCRC64FromStream(stream); + + assert.deepStrictEqual(Buffer.from(fromString), Buffer.from(fromStream)); + }); + + it("getCRC64FromStream produces identical results regardless of chunk boundaries", async () => { + // Streaming data split across different chunk sizes must produce the same + // CRC as a single contiguous buffer - chunk boundaries must not affect the result. + const data = Buffer.from("Azure Blob Storage block integrity check"); + const expected = getCRC64FromString(data.toString()); + + // Push as many 3-byte chunks (deliberately misaligned with any word boundary) + const chunked = new PassThrough(); + for (let i = 0; i < data.length; i += 3) { + chunked.push(data.slice(i, i + 3)); + } + chunked.push(null); + const fromChunked = await getCRC64FromStream(chunked); + + assert.deepStrictEqual(Buffer.from(fromChunked), Buffer.from(expected)); + }); + + it("getCRC64FromStream produces distinct values for payloads that differ by a single byte", async () => { + // Verifies the avalanche property: a one-byte change must alter the checksum. + const base = Buffer.from("block content for crc64 test"); + const mutated = Buffer.from([...base]); + mutated[mutated.length - 1] ^= 0x01; + + const baseStream = new PassThrough(); + baseStream.end(base); + const mutatedStream = new PassThrough(); + mutatedStream.end(mutated); + + const crc1 = await getCRC64FromStream(baseStream); + const crc2 = await getCRC64FromStream(mutatedStream); + + assert.notDeepStrictEqual(Buffer.from(crc1), Buffer.from(crc2)); + }); +}); + +describe("Transactional Checksum Representation", () => { + function makeBodyStream(body: string): PassThrough { + const stream = new PassThrough(); + stream.end(Buffer.from(body)); + return stream; + } + + it("accepts non-canonical base64 MD5 that decodes to the same 16 bytes", async () => { + const body = "representation-md5-test"; + const md5 = await getMD5FromString(body); + const canonical = Buffer.from(md5).toString("base64"); + const nonCanonical = canonical.replace(/=+$/, ""); + + await assert.doesNotReject(async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { md5: nonCanonical }, + "test-md5-noncanonical" + ); + }); + }); + + it("accepts non-canonical base64 CRC64 that decodes to the same 8 bytes", async () => { + const body = "representation-crc64-test"; + const crc64 = getCRC64FromString(body); + const canonical = Buffer.from(crc64).toString("base64"); + const nonCanonical = canonical.replace(/=+$/, ""); + + await assert.doesNotReject(async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { crc64: nonCanonical }, + "test-crc64-noncanonical" + ); + }); + }); + + it("rejects wrong-length MD5 that decodes to fewer than 16 bytes", async () => { + const body = "representation-md5-invalid-test"; + const malformedMd5 = Buffer.from([1, 2, 3, 4]).toString("base64").replace(/=+$/, ""); + + await assert.rejects( + async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { md5: malformedMd5 }, + "test-md5-invalid" + ); + }, + { + name: "StorageError", + storageErrorCode: "InvalidMd5" + } + ); + }); + + it("rejects wrong-length CRC64 that decodes to fewer than 8 bytes", async () => { + const body = "representation-crc64-invalid-test"; + const malformedCrc64 = Buffer.from([1, 2, 3, 4]).toString("base64").replace(/=+$/, ""); + + await assert.rejects( + async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { crc64: malformedCrc64 }, + "test-crc64-invalid" + ); + }, + { + name: "StorageError", + storageErrorCode: "InvalidHeaderValue" + } + ); + }); + + it("rejects invalid-base64 MD5 with non-base64 characters", async () => { + const body = "representation-md5-invalid-base64"; + + await assert.rejects( + async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { md5: "invalid_@md5" }, + "test-md5-invalid-base64" + ); + }, + { + name: "StorageError", + storageErrorCode: "InvalidMd5" + } + ); + }); + + it("rejects invalid-base64 CRC64 with non-base64 characters", async () => { + const body = "representation-crc64-invalid-base64"; + + await assert.rejects( + async () => { + await computeAndValidateTransactionalChecksums( + makeBodyStream(body), + { crc64: "invalid_@crc64" }, + "test-crc64-invalid-base64" + ); + }, + { + name: "StorageError", + storageErrorCode: "InvalidHeaderValue" + } + ); + }); +}); diff --git a/tests/testutils.ts b/tests/testutils.ts index 86c214622..a0c6cbf77 100644 --- a/tests/testutils.ts +++ b/tests/testutils.ts @@ -5,14 +5,94 @@ import { sign } from "jsonwebtoken"; import { join } from "path"; import rimraf from "rimraf"; import { URL } from "url"; +import { + EMULATOR_ACCOUNT_KEY_STR as DEFAULT_EMULATOR_ACCOUNT_KEY_STR, + EMULATOR_ACCOUNT_NAME as DEFAULT_EMULATOR_ACCOUNT_NAME +} from "../src/blob/utils/constants"; -export const EMULATOR_ACCOUNT_NAME = "devstoreaccount1"; +// ---- Live Azure mode ------------------------------------------------------- +// +// Set AZURITE_LIVE_TEST_CONNECTION_STRING to a full storage account connection +// string to route tests at a real Azure account instead of a local Azurite +// server. When set: +// - BlobTestServerFactory.createServer() returns a no-op stub. +// - EMULATOR_ACCOUNT_NAME / EMULATOR_ACCOUNT_KEY resolve to the live account. +// - getTestServerBaseURL(server) returns the live blob endpoint. +// +// Per-test files build their service-client base URL via `getTestServerBaseURL` +// (rather than the inline `http://host:port/devstoreaccount1` template), +// which routes correctly in both modes. + +function parseLiveConnectionString(cs: string): { + accountName: string; + accountKey: string; + blobEndpoint: string; +} { + const parts = new Map(); + for (const segment of cs.split(";")) { + const eq = segment.indexOf("="); + if (eq > 0) parts.set(segment.slice(0, eq).trim(), segment.slice(eq + 1).trim()); + } + const accountName = parts.get("AccountName"); + const accountKey = parts.get("AccountKey"); + const protocol = parts.get("DefaultEndpointsProtocol") || "https"; + const suffix = parts.get("EndpointSuffix") || "core.windows.net"; + if (!accountName || !accountKey) { + throw new Error( + "AZURITE_LIVE_TEST_CONNECTION_STRING is missing AccountName or AccountKey." + ); + } + const blobEndpoint = (parts.get("BlobEndpoint") || + `${protocol}://${accountName}.blob.${suffix}`).replace(/\/$/, ""); + return { accountName, accountKey, blobEndpoint }; +} + +const liveConnectionString = process.env.AZURITE_LIVE_TEST_CONNECTION_STRING || undefined; + +export const LIVE_TEST_MODE = liveConnectionString !== undefined; + +const liveConfig = liveConnectionString + ? parseLiveConnectionString(liveConnectionString) + : undefined; + +export const EMULATOR_ACCOUNT_NAME = + liveConfig?.accountName ?? DEFAULT_EMULATOR_ACCOUNT_NAME; export const EMULATOR_ACCOUNT_KEY = - "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; + liveConfig?.accountKey ?? + DEFAULT_EMULATOR_ACCOUNT_KEY_STR; + +/** + * Builds the blob service base URL for a test fixture. In emulator mode this + * is `http://:/devstoreaccount1`; in live mode it's the real + * account's blob endpoint (e.g. `https://.blob.core.windows.net`). + * + * Pass `https: true` for the few tests that explicitly need HTTPS against the + * emulator (oauth/https tests); ignored in live mode, where the protocol is + * dictated by the connection string (`DefaultEndpointsProtocol` / `BlobEndpoint`) + * and is HTTPS for typical Azure accounts. + */ +export function getTestServerBaseURL( + server: { config: { host: string; port: number } }, + options: { https?: boolean; accountPathSuffix?: string } = {} +): string { + if (liveConfig) { + return options.accountPathSuffix + ? `${liveConfig.blobEndpoint}${options.accountPathSuffix}` + : liveConfig.blobEndpoint; + } + const protocol = options.https ? "https" : "http"; + const suffix = options.accountPathSuffix ?? "/devstoreaccount1"; + return `${protocol}://${server.config.host}:${server.config.port}${suffix}`; +} + +// Counter-based suffix instead of Math.random() to guarantee uniqueness within +// a test run. Random suffixes can collide when multiple entities are created +// within the same millisecond on fast CI runners, causing flaky batch tests. +let _uniqueNameCounter = 0; export function getUniqueName(prefix: string): string { return `${prefix}${new Date().getTime()}${padStart( - Math.floor(Math.random() * 10000).toString(), + (++_uniqueNameCounter).toString(), 5, "00000" )}`; @@ -145,7 +225,7 @@ export async function createRandomLocalFile( ws.on("open", () => { // tslint:disable-next-line:no-empty - while (offsetInMB++ < blockNumber && ws.write(randomValueHex())) {} + while (offsetInMB++ < blockNumber && ws.write(randomValueHex())) { } if (offsetInMB >= blockNumber) { ws.end(); } @@ -153,7 +233,7 @@ export async function createRandomLocalFile( ws.on("drain", () => { // tslint:disable-next-line:no-empty - while (offsetInMB++ < blockNumber && ws.write(randomValueHex())) {} + while (offsetInMB++ < blockNumber && ws.write(randomValueHex())) { } if (offsetInMB >= blockNumber) { ws.end(); }