From 23d4b8682de3f2e44079ec21e7ca10cce00351ea Mon Sep 17 00:00:00 2001 From: alexanderkirtzel Date: Mon, 8 Jun 2026 17:42:17 +0200 Subject: [PATCH 1/2] validate --- .changeset/scoped-generic-data-attribute.md | 10 +- .changeset/transformer-validate.md | 17 + package-lock.json | 40 ++ packages/cli/examples/flow-complete.json | 11 - packages/cli/package.json | 1 + .../src/__tests__/unit/validate/flow.test.ts | 172 ++++- packages/cli/src/commands/validate/index.ts | 8 +- .../src/commands/validate/validators/flow.ts | 116 ++- packages/core/src/__tests__/contract.test.ts | 37 + packages/core/src/__tests__/flow.test.ts | 41 ++ .../schemas/flow-validate-retired.test.ts | 19 + .../schemas/validate-flow-config.test.ts | 48 +- .../core/src/__tests__/step-entry.test.ts | 13 + packages/core/src/contract.ts | 22 +- packages/core/src/flow.ts | 3 - .../__tests__/validate-json-schema.test.ts | 27 +- packages/core/src/schemas/flow.ts | 30 - packages/core/src/schemas/index.ts | 2 - packages/core/src/schemas/intellisense.ts | 13 + .../core/src/schemas/validate-flow-config.ts | 86 ++- packages/core/src/step-entry.ts | 1 - packages/core/src/types/destination.ts | 1 - packages/core/src/types/flow.ts | 5 +- packages/core/src/types/index.ts | 2 +- packages/core/src/types/source.ts | 1 - packages/core/src/types/transformer.ts | 1 - packages/core/src/types/validate.ts | 15 - .../src/__tests__/tools/generate.test.ts | 10 + .../mcps/source-browser/src/tools/generate.ts | 17 +- .../transformers/validate/jest.config.mjs | 5 + packages/transformers/validate/package.json | 67 ++ .../scripts/generate-format-schema.mjs | 41 ++ .../src/__tests__/bundle-isolation.test.ts | 185 +++++ .../src/__tests__/format-schema-sync.test.ts | 26 + .../validate/src/__tests__/scaffold.test.ts | 7 + .../src/__tests__/settings.schema.test.ts | 38 + .../src/__tests__/stepExamples.test.ts | 84 +++ .../src/__tests__/transformer.test.ts | 186 +++++ .../validate/src/__tests__/validate.test.ts | 247 +++++++ packages/transformers/validate/src/dev.ts | 4 + .../validate/src/event-format.schema.ts | 664 ++++++++++++++++++ .../validate/src/examples/index.ts | 1 + .../validate/src/examples/step.ts | 117 +++ packages/transformers/validate/src/hints.ts | 115 +++ packages/transformers/validate/src/index.ts | 10 + .../validate/src/schemas/index.ts | 5 + .../validate/src/schemas/settings.ts | 47 ++ .../transformers/validate/src/transformer.ts | 78 ++ packages/transformers/validate/src/types.ts | 62 ++ .../transformers/validate/src/validate.ts | 110 +++ packages/transformers/validate/tsconfig.json | 8 + packages/transformers/validate/tsup.config.ts | 3 + .../browser/src/__tests__/tagger.test.ts | 64 ++ .../browser/src/__tests__/walker.test.ts | 18 + packages/web/sources/browser/src/tagger.ts | 22 + skills/walkeros-understanding-flow/SKILL.md | 16 +- .../SKILL.md | 22 + .../docs/getting-started/flow/contract.mdx | 30 +- .../docs/getting-started/flow/validate.mdx | 176 ++--- website/docs/sources/web/browser/tagger.mdx | 38 + website/docs/transformers/validate.mdx | 129 ++++ website/sidebars.ts | 1 + 62 files changed, 3078 insertions(+), 317 deletions(-) create mode 100644 .changeset/transformer-validate.md create mode 100644 packages/core/src/__tests__/schemas/flow-validate-retired.test.ts create mode 100644 packages/transformers/validate/jest.config.mjs create mode 100644 packages/transformers/validate/package.json create mode 100644 packages/transformers/validate/scripts/generate-format-schema.mjs create mode 100644 packages/transformers/validate/src/__tests__/bundle-isolation.test.ts create mode 100644 packages/transformers/validate/src/__tests__/format-schema-sync.test.ts create mode 100644 packages/transformers/validate/src/__tests__/scaffold.test.ts create mode 100644 packages/transformers/validate/src/__tests__/settings.schema.test.ts create mode 100644 packages/transformers/validate/src/__tests__/stepExamples.test.ts create mode 100644 packages/transformers/validate/src/__tests__/transformer.test.ts create mode 100644 packages/transformers/validate/src/__tests__/validate.test.ts create mode 100644 packages/transformers/validate/src/dev.ts create mode 100644 packages/transformers/validate/src/event-format.schema.ts create mode 100644 packages/transformers/validate/src/examples/index.ts create mode 100644 packages/transformers/validate/src/examples/step.ts create mode 100644 packages/transformers/validate/src/hints.ts create mode 100644 packages/transformers/validate/src/index.ts create mode 100644 packages/transformers/validate/src/schemas/index.ts create mode 100644 packages/transformers/validate/src/schemas/settings.ts create mode 100644 packages/transformers/validate/src/transformer.ts create mode 100644 packages/transformers/validate/src/types.ts create mode 100644 packages/transformers/validate/src/validate.ts create mode 100644 packages/transformers/validate/tsconfig.json create mode 100644 packages/transformers/validate/tsup.config.ts create mode 100644 website/docs/transformers/validate.mdx diff --git a/.changeset/scoped-generic-data-attribute.md b/.changeset/scoped-generic-data-attribute.md index dda489a26..0e47ce033 100644 --- a/.changeset/scoped-generic-data-attribute.md +++ b/.changeset/scoped-generic-data-attribute.md @@ -1,10 +1,12 @@ --- '@walkeros/web-source-browser': minor -'@walkeros/mcp-source-browser': patch +'@walkeros/mcp-source-browser': minor --- Add the `data-elb_` scoped generic attribute. It carries the same `key:value` properties as the blanket `data-elb-` generic, but only events whose triggered -element is nested below the `data-elb_` element receive them. Use `data-elb-` -for properties every trigger in an entity should carry, and `data-elb_` when -only triggers within a specific branch should. +element is nested below the `data-elb_` element receive them. The +`createTagger()` API gains a `scoped()` method and the `generate_tagging` MCP +tool gains a `scoped` input to produce it. Use `data-elb-` for properties every +trigger in an entity should carry, and `data-elb_` when only triggers within a +specific branch should. diff --git a/.changeset/transformer-validate.md b/.changeset/transformer-validate.md new file mode 100644 index 000000000..5f39be2af --- /dev/null +++ b/.changeset/transformer-validate.md @@ -0,0 +1,17 @@ +--- +'@walkeros/transformer-validate': minor +'@walkeros/core': minor +'@walkeros/cli': patch +--- + +New `@walkeros/transformer-validate` transformer validates events against JSON +Schema contracts. It runs in both web and server flows, supports strict and pass +modes, and writes the verdict and error list to configurable paths so you can +gate or observe event quality. + +The declarative per-step `validate` field on sources, transformers, and +destinations is removed. Define event shapes in the top-level `contract` and +enforce them at runtime by adding a `transformer-validate` step that references +them via `$contract.`; `format: true` still checks an event is a valid +`WalkerOS.PartialEvent`. Design-time validation now checks step examples against +the resolved contract. diff --git a/package-lock.json b/package-lock.json index 483ac52ad..3044040fa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4494,6 +4494,12 @@ "integrity": "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ==", "license": "CC0-1.0" }, + "node_modules/@cfworker/json-schema": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@cfworker/json-schema/-/json-schema-4.1.1.tgz", + "integrity": "sha512-gAmrUZSGtKc3AiBL71iNWxDsyUC5uMaKKGdvzYsBoTW/xi42JQHl7eKV2OYzCUqvc+D2RCcf7EXY2iCyFIk6og==", + "license": "MIT" + }, "node_modules/@changesets/apply-release-plan": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/@changesets/apply-release-plan/-/apply-release-plan-7.1.1.tgz", @@ -20382,6 +20388,10 @@ "resolved": "packages/transformers/ga4", "link": true }, + "node_modules/@walkeros/transformer-validate": { + "resolved": "packages/transformers/validate", + "link": true + }, "node_modules/@walkeros/walker.js": { "resolved": "apps/walkerjs", "link": true @@ -48975,6 +48985,7 @@ "@walkeros/core": "4.1.2", "@walkeros/server-core": "4.1.2", "@walkeros/server-destination-api": "4.1.2", + "@walkeros/transformer-validate": "4.1.2", "ajv": "^8.17.1", "chalk": "^5.6.2", "ci-info": "^4.4.0", @@ -48990,6 +49001,7 @@ "pacote": "^21.0.4", "picomatch": "^4.0.4", "semver": "^7.7.4", + "tar": "^7.4.0", "zod": "^4.0" }, "bin": { @@ -52030,6 +52042,22 @@ "@walkeros/server-core": "4.1.2" } }, + "packages/server/transformers/validate": { + "name": "@walkeros/server-transformer-validate", + "version": "4.1.2", + "extraneous": true, + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/elbwalker" + } + ], + "license": "MIT", + "dependencies": { + "@cfworker/json-schema": "^4.1.1", + "@walkeros/core": "4.1.2" + } + }, "packages/transformers/demo": { "name": "@walkeros/transformer-demo", "version": "4.1.2", @@ -52049,6 +52077,18 @@ "@walkeros/core": "4.1.2" } }, + "packages/transformers/validate": { + "name": "@walkeros/transformer-validate", + "version": "4.1.2", + "license": "MIT", + "dependencies": { + "@cfworker/json-schema": "^4.1.1", + "@walkeros/core": "4.1.2" + }, + "devDependencies": { + "@walkeros/core": "4.1.2" + } + }, "packages/web/core": { "name": "@walkeros/web-core", "version": "4.1.2", diff --git a/packages/cli/examples/flow-complete.json b/packages/cli/examples/flow-complete.json index d70c964dc..accf5dbf8 100644 --- a/packages/cli/examples/flow-complete.json +++ b/packages/cli/examples/flow-complete.json @@ -286,17 +286,6 @@ "destinations": { "ga4": { "package": "@walkeros/web-destination-gtag", - "validate": { - "events": { - "order": { - "complete": { - "properties": { - "data": { "required": ["id", "total"] } - } - } - } - } - }, "config": { "require": ["consent", "user"], "consent": { diff --git a/packages/cli/package.json b/packages/cli/package.json index b4bf9ac77..6e194098c 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -56,6 +56,7 @@ "@walkeros/core": "4.1.2", "@walkeros/server-core": "4.1.2", "@walkeros/server-destination-api": "4.1.2", + "@walkeros/transformer-validate": "4.1.2", "ajv": "^8.17.1", "chalk": "^5.6.2", "ci-info": "^4.4.0", diff --git a/packages/cli/src/__tests__/unit/validate/flow.test.ts b/packages/cli/src/__tests__/unit/validate/flow.test.ts index 6c05b2515..8447ffab1 100644 --- a/packages/cli/src/__tests__/unit/validate/flow.test.ts +++ b/packages/cli/src/__tests__/unit/validate/flow.test.ts @@ -608,37 +608,47 @@ describe('validateFlow', () => { expect(result.valid).toBe(true); }); + }); - it('warns about contract compliance', () => { - const result = validateFlow({ - version: 4, - contract: { - default: { - events: { - page: { - view: { + describe('contract compliance (example vs resolved contract)', () => { + const contractRequiringTotal = { + default: { + events: { + order: { + complete: { + type: 'object', + properties: { + data: { type: 'object', - properties: { title: { type: 'string' } }, + required: ['total'], + properties: { total: { type: 'number' } }, }, }, }, }, }, + }, + }; + + it('warns when a destination example violates the contract (non-strict)', () => { + const result = validateFlow({ + version: 4, + contract: contractRequiringTotal, flows: { default: { config: { platform: 'web' }, destinations: { - gtag: { - package: '@walkeros/web-destination-gtag', + api: { + package: '@walkeros/web-destination-api', examples: { - pageview: { + order: { in: { - name: 'page view', - entity: 'page', - action: 'view', - data: { title: 'Home' }, + name: 'order complete', + entity: 'order', + action: 'complete', + data: { id: 'A1' }, // missing required `total` }, - out: ['event', 'page_view'], + out: ['event', 'purchase'], }, }, }, @@ -647,13 +657,137 @@ describe('validateFlow', () => { }, }); + // Non-strict: violation is a warning, validation stays valid. expect(result.valid).toBe(true); expect(result.warnings).toContainEqual( expect.objectContaining({ - path: 'destination.gtag.examples.pageview', - message: expect.stringContaining('contract'), + path: 'destination.api.examples.order.in', + message: expect.stringContaining('violates contract'), }), ); }); + + it('errors when a destination example violates the contract (strict)', () => { + const result = validateFlow( + { + version: 4, + contract: contractRequiringTotal, + flows: { + default: { + config: { platform: 'web' }, + destinations: { + api: { + package: '@walkeros/web-destination-api', + examples: { + order: { + in: { + name: 'order complete', + entity: 'order', + action: 'complete', + data: { id: 'A1' }, + }, + out: ['event', 'purchase'], + }, + }, + }, + }, + }, + }, + }, + { strict: true }, + ); + + expect(result.valid).toBe(false); + expect(result.errors).toContainEqual( + expect.objectContaining({ + path: 'destination.api.examples.order.in', + code: 'CONTRACT_VIOLATION', + }), + ); + }); + + it('does not flag a compliant example', () => { + const result = validateFlow({ + version: 4, + contract: contractRequiringTotal, + flows: { + default: { + config: { platform: 'web' }, + destinations: { + api: { + package: '@walkeros/web-destination-api', + examples: { + order: { + in: { + name: 'order complete', + entity: 'order', + action: 'complete', + data: { total: 9.99 }, + }, + out: ['event', 'purchase'], + }, + }, + }, + }, + }, + }, + }); + + expect(result.valid).toBe(true); + expect( + result.warnings.some((w) => w.path.includes('destination.api')), + ).toBe(false); + expect(result.errors.some((e) => e.code === 'CONTRACT_VIOLATION')).toBe( + false, + ); + }); + + const uncoveredEventFlow = { + version: 4, + contract: contractRequiringTotal, + flows: { + default: { + config: { platform: 'web' }, + destinations: { + api: { + package: '@walkeros/web-destination-api', + examples: { + page: { + in: { + name: 'page view', + entity: 'page', + action: 'view', + data: { title: 'Home' }, + }, + out: ['event', 'page_view'], + }, + }, + }, + }, + }, + }, + } as const; + + it('produces no diagnostic when an example matches no contract entry', () => { + const result = validateFlow(uncoveredEventFlow); + + expect(result.valid).toBe(true); + expect( + result.warnings.some((w) => w.path.includes('destination.api')), + ).toBe(false); + expect(result.errors.some((e) => e.code === 'CONTRACT_VIOLATION')).toBe( + false, + ); + }); + + it('does not fail --strict on an uncovered event type', () => { + const result = validateFlow(uncoveredEventFlow, { strict: true }); + + expect(result.valid).toBe(true); + expect(result.errors).toHaveLength(0); + expect( + result.warnings.some((w) => w.path.includes('destination.api')), + ).toBe(false); + }); }); }); diff --git a/packages/cli/src/commands/validate/index.ts b/packages/cli/src/commands/validate/index.ts index ebff9f4d9..68dbd2814 100644 --- a/packages/cli/src/commands/validate/index.ts +++ b/packages/cli/src/commands/validate/index.ts @@ -31,7 +31,7 @@ import type { export async function validate( type: ValidationType, input: unknown, - options: { flow?: string; path?: string } = {}, + options: { flow?: string; path?: string; strict?: boolean } = {}, ): Promise { // Resolve string inputs (file paths, URLs, JSON strings) to parsed objects let resolved = input; @@ -53,7 +53,10 @@ export async function validate( case 'event': return validateEvent(resolved); case 'flow': - return validateFlow(resolved, { flow: options.flow }); + return validateFlow(resolved, { + flow: options.flow, + strict: options.strict, + }); case 'mapping': return validateMapping(resolved); default: @@ -141,6 +144,7 @@ export async function validateCommand( const result = await validate(options.type, input, { flow: options.flow, path: options.path, + strict: options.strict, }); // Format and write result diff --git a/packages/cli/src/commands/validate/validators/flow.ts b/packages/cli/src/commands/validate/validators/flow.ts index d3157b5ac..64d1f49b9 100644 --- a/packages/cli/src/commands/validate/validators/flow.ts +++ b/packages/cli/src/commands/validate/validators/flow.ts @@ -1,8 +1,14 @@ // walkerOS/packages/cli/src/commands/validate/validators/flow.ts -import type { Flow } from '@walkeros/core'; -import { getFlowSettings, isObject, validateStepEntry } from '@walkeros/core'; +import type { Flow, WalkerOS } from '@walkeros/core'; +import { + getFlowSettings, + isObject, + resolveContracts, + validateStepEntry, +} from '@walkeros/core'; import { schemas } from '@walkeros/core/dev'; +import { validateEventAgainstContract } from '@walkeros/transformer-validate'; import type { ValidateResult, ValidationError, @@ -13,6 +19,8 @@ const { validateFlowConfig } = schemas; interface FlowValidateOptions { flow?: string; + /** When true, contract violations are reported as errors instead of warnings. */ + strict?: boolean; } /** @@ -179,7 +187,13 @@ export function validateFlow( // Contract compliance (contracts live on Config level only) const contract = input.contract; if (contract) { - checkContractCompliance(flowSettings, contract, warnings); + checkContractCompliance( + flowSettings, + contract, + errors, + warnings, + options.strict === true, + ); } } details.connectionsChecked = totalConnections; @@ -633,44 +647,82 @@ function lintRoute( // Bare gate: nothing else to recurse into. } +/** + * Validate each step example against the flow's resolved top-level contract. + * + * - A contract violation → error when {@link strict}, else warning. + * - An entity.action with no matching contract entry produces NO diagnostic: + * the shared {@link validateEventAgainstContract} authority treats no-match + * as "no opinion = pass", and so do we. Emitting a warning here would wrongly + * fail `walkeros validate --strict` for any event type the contract simply + * does not cover. + * + * Only the canonical event INPUTS are validated: `destination.in` and + * `transformer.in`. A source example's `in` is RAW input (an HTTP request, a + * dataLayer array, an HTML string), not a canonical walkerOS event, so + * validating it against the event contract would be semantically wrong. + * Source-input validation (per-package input formats) and canonical-output + * validation (`source.out`/`transformer.out`, which are `StepOut` effect + * tuples, not events) are deferred to v2. + * + * The verdict comes from the shared {@link validateEventAgainstContract} + * authority so design-time and runtime stay in lockstep. + */ function checkContractCompliance( config: Flow, contract: Flow.Contract, + errors: ValidationError[], warnings: ValidationWarning[], + strict: boolean, ): void { - for (const [name, dest] of Object.entries(config.destinations || {})) { - if (!dest.examples) continue; - - for (const [exName, example] of Object.entries(dest.examples)) { - if (!example.in || typeof example.in !== 'object') continue; - - const event = example.in as { entity?: string; action?: string }; - if (!event.entity || !event.action) continue; + // Resolve extend chains + wildcards once; each rule is a ContractSource. + const resolved = resolveContracts(contract); + const rules = Object.values(resolved); + if (rules.length === 0) return; + + const checkExample = (path: string, candidate: unknown): void => { + if (!isObject(candidate)) return; + + const entity = + typeof candidate.entity === 'string' ? candidate.entity : undefined; + const action = + typeof candidate.action === 'string' ? candidate.action : undefined; + if (!entity || !action) return; + + const event: WalkerOS.DeepPartialEvent = candidate; + const result = validateEventAgainstContract(event, undefined, { + contracts: rules, + }); + // No-match returns isValid:true (no opinion = pass), so an uncovered + // event type produces no diagnostic, matching runtime semantics. + if (result.isValid) return; - // Walk every named contract rule and look in its events map. - // First match (entity exact or wildcard, action exact or wildcard) wins. - let matched = false; - for (const rule of Object.values(contract)) { - const events = rule.events; - if (!events) continue; + const message = `Example violates contract: ${result.errors + .map((e) => `${e.path || '/'}: ${e.message}`) + .join('; ')}`; - const entityActions = events[event.entity] || events['*']; - if (!entityActions) continue; + if (strict) { + errors.push({ path, message, code: 'CONTRACT_VIOLATION' }); + } else { + warnings.push({ + path, + message, + suggestion: 'Fix the example data to satisfy the contract schema.', + }); + } + }; - const actionSchema = entityActions[event.action] || entityActions['*']; - if (actionSchema) { - matched = true; - break; - } - } + for (const [name, dest] of Object.entries(config.destinations || {})) { + if (!dest.examples) continue; + for (const [exName, example] of Object.entries(dest.examples)) { + checkExample(`destination.${name}.examples.${exName}.in`, example.in); + } + } - if (matched) { - warnings.push({ - path: `destination.${name}.examples.${exName}`, - message: `Example has contract for ${event.entity}.${event.action}`, - suggestion: 'Verify example data matches contract schema', - }); - } + for (const [name, transformer] of Object.entries(config.transformers || {})) { + if (!transformer.examples) continue; + for (const [exName, example] of Object.entries(transformer.examples)) { + checkExample(`transformer.${name}.examples.${exName}.in`, example.in); } } } diff --git a/packages/core/src/__tests__/contract.test.ts b/packages/core/src/__tests__/contract.test.ts index bb6ede3bd..41b83d446 100644 --- a/packages/core/src/__tests__/contract.test.ts +++ b/packages/core/src/__tests__/contract.test.ts @@ -285,6 +285,43 @@ describe('resolveContracts', () => { }); }); + it('preserves event-level annotations when stripAnnotations is false', () => { + const contract: Flow.Contract = { + web: { + events: { + product: { + view: { + description: 'Product viewed', + properties: { + data: { + type: 'object', + properties: { + id: { type: 'string', description: 'The SKU' }, + }, + }, + }, + }, + }, + }, + }, + }; + + // Default path still strips annotations (AJV-clean). + const stripped = resolveContracts(contract); + expect(stripped.web.events?.product.view).not.toHaveProperty('description'); + + // Annotation-preserving view keeps descriptions for IntelliSense. + const annotated = resolveContracts(contract, { stripAnnotations: false }); + expect(annotated.web.events?.product.view).toMatchObject({ + description: 'Product viewed', + properties: { + data: { + properties: { id: { type: 'string', description: 'The SKU' } }, + }, + }, + }); + }); + it('should handle contract with only schema, no events', () => { const contract: Flow.Contract = { consent_only: { diff --git a/packages/core/src/__tests__/flow.test.ts b/packages/core/src/__tests__/flow.test.ts index 3837aa8aa..ac156dd6c 100644 --- a/packages/core/src/__tests__/flow.test.ts +++ b/packages/core/src/__tests__/flow.test.ts @@ -2143,6 +2143,47 @@ describe('$contract edge cases', () => { }, }); }); + + test('$contract.web resolves inside transformer config.settings.contract', () => { + const setup: Flow.Json = { + version: 4, + contract: { + web: { + events: { + page: { view: { required: ['data'] } }, + }, + }, + }, + flows: { + default: { + config: { platform: 'web' }, + transformers: { + validate: { + package: '@walkeros/transformer-validate', + config: { + settings: { + contract: ['$contract.web', { type: 'object' }], + }, + }, + }, + }, + }, + }, + }; + const config = getFlowSettings(setup); + + // The whole-string `$contract.web` resolves to the RESOLVED ContractRule + // (wildcards expanded, NOT the literal string), and the inline sibling + // passes through untouched. + expect(config.transformers?.validate?.config).toEqual({ + settings: { + contract: [ + { events: { page: { view: { required: ['data'] } } } }, + { type: 'object' }, + ], + }, + }); + }); }); describe('$contract reference resolution', () => { diff --git a/packages/core/src/__tests__/schemas/flow-validate-retired.test.ts b/packages/core/src/__tests__/schemas/flow-validate-retired.test.ts new file mode 100644 index 000000000..4947b0e6e --- /dev/null +++ b/packages/core/src/__tests__/schemas/flow-validate-retired.test.ts @@ -0,0 +1,19 @@ +import { + SourceSchema, + TransformerSchema, + DestinationSchema, +} from '../../schemas/flow'; + +describe('retired per-step validate field', () => { + it.each([ + ['Source', SourceSchema], + ['Transformer', TransformerSchema], + ['Destination', DestinationSchema], + ] as const)('%s: strips a top-level validate field', (_name, schema) => { + const parsed = schema.parse({ + package: '@walkeros/x', + validate: { format: true }, + }); + expect('validate' in parsed).toBe(false); + }); +}); diff --git a/packages/core/src/__tests__/schemas/validate-flow-config.test.ts b/packages/core/src/__tests__/schemas/validate-flow-config.test.ts index 44539bb7f..1b72c0be2 100644 --- a/packages/core/src/__tests__/schemas/validate-flow-config.test.ts +++ b/packages/core/src/__tests__/schemas/validate-flow-config.test.ts @@ -237,10 +237,56 @@ describe('validateFlowConfig', () => { ); const result = validateFlowConfig(json); expect(result.context?.contract).toEqual([ - { entity: 'page', actions: ['view', 'read'] }, + { + entity: 'page', + actions: ['view', 'read'], + properties: { view: {}, read: {} }, + }, ]); }); + it('returns context contract with typed property info and descriptions', () => { + const json = JSON.stringify( + { + version: 4, + contract: { + default: { + events: { + order: { + complete: { + type: 'object', + properties: { + data: { + type: 'object', + required: ['total'], + properties: { + total: { + type: 'number', + description: 'Order total in EUR', + }, + }, + }, + }, + }, + }, + }, + }, + }, + flows: { default: { config: { platform: 'server' } } }, + }, + null, + 2, + ); + const result = validateFlowConfig(json); + const order = result.context?.contract?.find((c) => c.entity === 'order'); + expect(order?.actions).toEqual(['complete']); + expect(order?.properties?.complete?.total).toEqual({ + type: 'number', + description: 'Order total in EUR', + required: true, + }); + }); + it('returns empty context for invalid JSON', () => { const result = validateFlowConfig('{'); expect(result.context).toBeUndefined(); diff --git a/packages/core/src/__tests__/step-entry.test.ts b/packages/core/src/__tests__/step-entry.test.ts index bd470c381..c9222fc7b 100644 --- a/packages/core/src/__tests__/step-entry.test.ts +++ b/packages/core/src/__tests__/step-entry.test.ts @@ -188,6 +188,19 @@ describe('validateStepEntry — kind-specific', () => { expect(r.ok).toBe(false); expect(r.code).toBe('UNKNOWN_KEY'); }); + + it.each(['Source', 'Transformer', 'Destination', 'Store'] as const)( + '%s: rejects retired `validate` key (UNKNOWN_KEY)', + (kind) => { + const r = validateStepEntry( + { package: '@walkeros/x', validate: {} }, + kind, + ); + expect(r.ok).toBe(false); + expect(r.code).toBe('UNKNOWN_KEY'); + expect(r.key).toBe('validate'); + }, + ); }); describe('isPathStepEntry', () => { diff --git a/packages/core/src/contract.ts b/packages/core/src/contract.ts index acdbd624d..f1ec43726 100644 --- a/packages/core/src/contract.ts +++ b/packages/core/src/contract.ts @@ -9,16 +9,33 @@ const ANNOTATION_KEYS = new Set([ '$comment', ]); +/** Options for {@link resolveContracts}. */ +export interface ResolveContractsOptions { + /** + * When true (default), annotation keys (`description`, `examples`, `title`, + * `$comment`) are stripped from event schemas so the result is AJV-clean for + * runtime validation. Set to false to keep annotations (e.g. for IntelliSense + * that surfaces property descriptions). + */ + stripAnnotations?: boolean; +} + /** * Resolve all named contracts: process extend chains, expand wildcards, * strip annotations from event schemas. * * Returns a fully resolved map where each contract entry has inherited * properties merged in and wildcards expanded into concrete actions. + * + * By default annotations are stripped (AJV-clean). Pass + * `{ stripAnnotations: false }` to preserve `description`/`examples`/`title` + * on event schemas. */ export function resolveContracts( contracts: Flow.Contract, + options?: ResolveContractsOptions, ): Record { + const strip = options?.stripAnnotations !== false; const resolved: Record = {}; const resolving = new Set(); // Circular detection @@ -56,8 +73,9 @@ export function resolveContracts( result.events = expandWildcards(result.events); } - // 3. Strip annotations from event schemas (not from section schemas) - if (result.events) { + // 3. Strip annotations from event schemas (not from section schemas). + // Skipped when stripAnnotations is false (annotation-preserving view). + if (result.events && strip) { const stripped: Flow.ContractEvents = {}; for (const [entity, actions] of Object.entries(result.events)) { stripped[entity] = {}; diff --git a/packages/core/src/flow.ts b/packages/core/src/flow.ts index 9338f4e6f..cb96fd6bd 100644 --- a/packages/core/src/flow.ts +++ b/packages/core/src/flow.ts @@ -605,7 +605,6 @@ function resolveFlowSettings( before: source.before, next: source.next, cache: source.cache, - validate: source.validate, code: source.code, } as Flow.Source; } @@ -645,7 +644,6 @@ function resolveFlowSettings( before: dest.before, next: dest.next, cache: dest.cache, - validate: dest.validate, code: dest.code, } as Flow.Destination; } @@ -722,7 +720,6 @@ function resolveFlowSettings( before: transformer.before, next: transformer.next, cache: transformer.cache, - validate: transformer.validate, code: transformer.code, } as Flow.Transformer; } diff --git a/packages/core/src/schemas/__tests__/validate-json-schema.test.ts b/packages/core/src/schemas/__tests__/validate-json-schema.test.ts index 53b998049..abc962976 100644 --- a/packages/core/src/schemas/__tests__/validate-json-schema.test.ts +++ b/packages/core/src/schemas/__tests__/validate-json-schema.test.ts @@ -1,38 +1,17 @@ -import { validateJsonSchema, validateEventsJsonSchema } from '../flow'; +import { validateEventsJsonSchema } from '../flow'; import { schemas } from '../../dev'; describe('validate JSON Schema exports', () => { - test('validateJsonSchema describes the step validate object', () => { - expect(validateJsonSchema).toMatchObject({ - type: 'object', - title: 'Validate', - properties: { - format: expect.anything(), - events: expect.anything(), - schema: expect.anything(), - }, - }); - }); - test('validateEventsJsonSchema is a generated JSON Schema object', () => { expect(validateEventsJsonSchema).toBeDefined(); expect(typeof validateEventsJsonSchema).toBe('object'); }); - - test('validateJsonSchema carries field descriptions', () => { - const { format, schema } = validateJsonSchema.properties ?? {}; - expect(typeof format).toBe('object'); - expect(typeof schema).toBe('object'); - if (typeof format === 'object') expect(format.description).toBeTruthy(); - if (typeof schema === 'object') expect(schema.description).toBeTruthy(); - }); }); describe('direct schema export promotion', () => { - test('validate schemas are reachable as schemas.X', () => { - expect(schemas.ValidateSchema).toBeDefined(); + test('validate-events schemas are reachable as schemas.X', () => { expect(schemas.ValidateEventsSchema).toBeDefined(); - expect(schemas.validateJsonSchema).toBeDefined(); + expect(schemas.validateEventsJsonSchema).toBeDefined(); }); test('no-many route schema is reachable as schemas.X', () => { diff --git a/packages/core/src/schemas/flow.ts b/packages/core/src/schemas/flow.ts index 265654417..a62a21657 100644 --- a/packages/core/src/schemas/flow.ts +++ b/packages/core/src/schemas/flow.ts @@ -253,28 +253,6 @@ export const ValidateEventsSchema = z }) .describe('Entity-action keyed JSON Schemas'); -/** - * Validate schema - step-level validation configuration. - */ -export const ValidateSchema = z - .object({ - format: z - .boolean() - .optional() - .describe('Validate event structure against the standard event format'), - events: ValidateEventsSchema.optional().describe( - 'Per entity-action JSON Schemas to validate matching events against', - ), - schema: JsonSchemaSchema.optional().describe( - 'A single JSON Schema applied to every event this step handles', - ), - }) - .meta({ - id: 'Validate', - title: 'Validate', - description: 'Step-level validation: { format?, events?, schema? }', - }); - // ======================================== // Source / Destination / Transformer / Store Schemas // ======================================== @@ -382,7 +360,6 @@ export const SourceSchema = z cache: EventCacheSchema.optional().describe( 'Cache configuration for this source (match → key → ttl rules)', ), - validate: ValidateSchema.optional(), }) .meta({ id: 'FlowSource', @@ -451,7 +428,6 @@ export const TransformerSchema = z cache: EventCacheSchema.optional().describe( 'Cache configuration for this transformer (match → key → ttl rules)', ), - validate: ValidateSchema.optional(), }) .meta({ id: 'FlowTransformer', @@ -527,7 +503,6 @@ export const DestinationSchema = z cache: EventCacheSchema.optional().describe( 'Cache configuration for this destination (match → key → ttl rules)', ), - validate: ValidateSchema.optional(), }) .meta({ id: 'FlowDestination', @@ -949,11 +924,6 @@ export const contractRuleJsonSchema = toJsonSchema( */ export const contractJsonSchema = toJsonSchema(ContractSchema, 'Contract'); -/** - * JSON Schema for the step-level Validate config (Flow.Validate). - */ -export const validateJsonSchema = toJsonSchema(ValidateSchema, 'Validate'); - /** * JSON Schema for the entity-action keyed Validate events map * (Flow.ValidateEvents). Shares its shape with Contract `events`. diff --git a/packages/core/src/schemas/index.ts b/packages/core/src/schemas/index.ts index 8a7a2043b..407e4c694 100644 --- a/packages/core/src/schemas/index.ts +++ b/packages/core/src/schemas/index.ts @@ -161,7 +161,6 @@ export { ContractActionsSchema, ContractSchemaEntry, ContractRuleSchema, - ValidateSchema, ValidateEventsSchema, SourceSchema as FlowSourceSchema, DestinationSchema as FlowDestinationSchema, @@ -184,7 +183,6 @@ export { transformerJsonSchema, contractRuleJsonSchema, contractJsonSchema, - validateJsonSchema, validateEventsJsonSchema, } from './flow'; diff --git a/packages/core/src/schemas/intellisense.ts b/packages/core/src/schemas/intellisense.ts index 69c9d9d94..8a6dcf40d 100644 --- a/packages/core/src/schemas/intellisense.ts +++ b/packages/core/src/schemas/intellisense.ts @@ -19,6 +19,19 @@ export interface IntelliSenseContext { contract?: Array<{ entity: string; actions: string[]; + /** + * Per-action `data` property info derived from the resolved contract + * schema. Keyed by action, each entry maps a `data` property name to its + * type, description, and whether it is required. Empty object when the + * action schema declares no `data` properties. + */ + properties?: Record< + string, + Record< + string, + { type?: string; description?: string; required?: boolean } + > + >; }>; packages?: PackageInfo[]; platform?: 'web' | 'server'; diff --git a/packages/core/src/schemas/validate-flow-config.ts b/packages/core/src/schemas/validate-flow-config.ts index 5e13facdd..64f639c8b 100644 --- a/packages/core/src/schemas/validate-flow-config.ts +++ b/packages/core/src/schemas/validate-flow-config.ts @@ -1,6 +1,8 @@ import { JsonSchema } from './flow'; import type { ValidationIssue, ValidationResult } from './validate'; import type { IntelliSenseContext, PackageInfo } from './intellisense'; +import type { Flow } from '../types'; +import { resolveContracts } from '../contract'; /** * Validate a Flow.Config JSON string. @@ -92,7 +94,7 @@ function extractContext( const transformers: string[] = []; const stores: string[] = []; const packages: PackageInfo[] = []; - const contractEntities: Array<{ entity: string; actions: string[] }> = []; + const contractEntities: ContractEntity[] = []; let platform: 'web' | 'server' | undefined; const flowNames = Object.keys(parsed.flows); @@ -466,28 +468,86 @@ function mergeVars(target: Record, source: unknown): void { } } +type ContractEntity = NonNullable[number]; +type ContractProperty = { + type?: string; + description?: string; + required?: boolean; +}; + +/** + * Derive per-action `data` property info from a resolved action schema. + * + * Event contract schemas describe the full event; the user-authored payload + * lives under `properties.data`. We surface those `data` property names with + * their type/description (for IntelliSense) and whether they are required. + */ +function extractActionProperties( + schema: unknown, +): Record { + const result: Record = {}; + if (!isObject(schema)) return result; + + const data = isObject(schema.properties) ? schema.properties.data : undefined; + if (!isObject(data)) return result; + + const required = Array.isArray(data.required) + ? data.required.filter((r): r is string => typeof r === 'string') + : []; + + const props = data.properties; + if (!isObject(props)) return result; + + for (const [name, propSchema] of Object.entries(props)) { + const info: ContractProperty = {}; + if (isObject(propSchema)) { + if (typeof propSchema.type === 'string') info.type = propSchema.type; + if (typeof propSchema.description === 'string') + info.description = propSchema.description; + } + if (required.includes(name)) info.required = true; + result[name] = info; + } + + return result; +} + function extractContractEntities( - target: Array<{ entity: string; actions: string[] }>, + target: ContractEntity[], contract: unknown, ): void { if (!isObject(contract)) return; - // Named contracts: iterate each named entry - for (const [, entry] of Object.entries(contract)) { - if (!isObject(entry)) continue; + // Resolve extend chains + wildcards with annotations preserved, so property + // descriptions survive for IntelliSense. Resolution can throw on malformed + // contracts (circular extend, unknown ref); the surrounding context + // extraction is best-effort, so fall back to the raw shape on failure. + let resolved: Record; + try { + resolved = resolveContracts(contract as Flow.Contract, { + stripAnnotations: false, + }); + } catch { + return; + } + + // Named contracts: iterate each resolved named entry + for (const entry of Object.values(resolved)) { const events = entry.events; if (!isObject(events)) continue; for (const [entity, actions] of Object.entries(events)) { if (!isObject(actions)) continue; - const existing = target.find((e) => e.entity === entity); - const actionNames = Object.keys(actions); - if (existing) { - for (const a of actionNames) { - if (!existing.actions.includes(a)) existing.actions.push(a); - } - } else { - target.push({ entity, actions: actionNames }); + let existing = target.find((e) => e.entity === entity); + if (!existing) { + existing = { entity, actions: [], properties: {} }; + target.push(existing); + } + if (!existing.properties) existing.properties = {}; + + for (const [action, schema] of Object.entries(actions)) { + if (!existing.actions.includes(action)) existing.actions.push(action); + existing.properties[action] = extractActionProperties(schema); } } } diff --git a/packages/core/src/step-entry.ts b/packages/core/src/step-entry.ts index 96c37aeb6..c24377085 100644 --- a/packages/core/src/step-entry.ts +++ b/packages/core/src/step-entry.ts @@ -43,7 +43,6 @@ export const STEP_OPERATIVE_FIELDS: Record = { const COMMON_NON_OPERATIVE = [ 'config', 'env', - 'validate', 'variables', 'examples', 'disabled', diff --git a/packages/core/src/types/destination.ts b/packages/core/src/types/destination.ts index 92fb0a774..52ca5b510 100644 --- a/packages/core/src/types/destination.ts +++ b/packages/core/src/types/destination.ts @@ -202,7 +202,6 @@ export type Init = { next?: Transformer.Route; cache?: import('./cache').Cache; state?: import('./state').State | import('./state').State[]; - validate?: import('./validate').Validate; }; export interface InitDestinations { diff --git a/packages/core/src/types/flow.ts b/packages/core/src/types/flow.ts index 598666374..b21442086 100644 --- a/packages/core/src/types/flow.ts +++ b/packages/core/src/types/flow.ts @@ -40,7 +40,7 @@ import type { Collector } from '.'; import type { Cache, EventCacheRule, StoreCacheRule } from './cache'; import type { Route } from './transformer'; -import type { Validate, ValidateEvents, JsonSchema } from './validate'; +import type { ValidateEvents, JsonSchema } from './validate'; /** * Single flow configuration. @@ -413,7 +413,6 @@ export namespace Flow { /** Cache configuration for this source. */ cache?: Cache; - validate?: Validate; /** * Source-level variables (highest priority in cascade). @@ -489,7 +488,6 @@ export namespace Flow { /** Cache configuration for this destination. */ cache?: Cache; - validate?: Validate; /** Destination-level variables (highest priority in cascade). */ variables?: Variables; @@ -559,7 +557,6 @@ export namespace Flow { /** Cache configuration for this transformer. */ cache?: Cache; - validate?: Validate; /** Transformer-level variables (highest priority in cascade). */ variables?: Variables; diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index fe1a3271e..05b8a5cb2 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -44,7 +44,7 @@ export type { SendDataValue, SendHeaders, SendResponse } from './send'; export type { Ingest, IngestMeta } from './ingest'; export { createIngest } from './ingest'; -export type { JsonSchema, ValidateEvents, Validate } from './validate'; +export type { JsonSchema, ValidateEvents } from './validate'; // Telemetry FlowState shape, surfaced for observers to consume. export type { diff --git a/packages/core/src/types/source.ts b/packages/core/src/types/source.ts index 6b525cc3d..8e2ebf8b9 100644 --- a/packages/core/src/types/source.ts +++ b/packages/core/src/types/source.ts @@ -247,7 +247,6 @@ export type InitSource = { before?: Route; cache?: import('./cache').Cache; state?: import('./state').State | import('./state').State[]; - validate?: import('./validate').Validate; }; /** diff --git a/packages/core/src/types/transformer.ts b/packages/core/src/types/transformer.ts index b8ebabdf7..6400a102d 100644 --- a/packages/core/src/types/transformer.ts +++ b/packages/core/src/types/transformer.ts @@ -260,7 +260,6 @@ export type InitTransformer = { cache?: import('./cache').Cache; state?: import('./state').State | import('./state').State[]; mapping?: MappingConfig; - validate?: import('./validate').Validate; }; /** diff --git a/packages/core/src/types/validate.ts b/packages/core/src/types/validate.ts index 11d921843..b333821b4 100644 --- a/packages/core/src/types/validate.ts +++ b/packages/core/src/types/validate.ts @@ -10,18 +10,3 @@ export type JsonSchema = Record; * Wildcard fallback semantic: entity.action → entity.* → *.action → *.*. */ export type ValidateEvents = Record>; - -/** - * Step-level validation primitive. Declares validation intent for a source, - * transformer, or destination: a `format` toggle, entity-action keyed schemas, - * and/or a generic JSON Schema for the full input. Declarative only, - * consumers (CLI tooling, MCP, custom runners) decide whether and how to enforce. - */ -export interface Validate { - /** Validate the full `WalkerOS.Event` structural shape. */ - format?: boolean; - /** Entity-action keyed JSON Schemas. */ - events?: ValidateEvents; - /** Generic JSON Schema for the full input. */ - schema?: JsonSchema; -} diff --git a/packages/mcps/source-browser/src/__tests__/tools/generate.test.ts b/packages/mcps/source-browser/src/__tests__/tools/generate.test.ts index 13e0f44c3..a4628a600 100644 --- a/packages/mcps/source-browser/src/__tests__/tools/generate.test.ts +++ b/packages/mcps/source-browser/src/__tests__/tools/generate.test.ts @@ -102,6 +102,16 @@ describe('generate_tagging tool', () => { expect(result.structuredContent.attributes['data-track']).toBe('item'); }); + it('generates scoped generic', async () => { + const tool = server.getTool('generate_tagging'); + const result = await tool.handler({ + scoped: { size: 'L', color: 'red' }, + }); + expect(result.structuredContent.attributes['data-elb_']).toBe( + 'size:L;color:red', + ); + }); + it('returns error when no parameters provided', async () => { const tool = server.getTool('generate_tagging'); const result = await tool.handler({}); diff --git a/packages/mcps/source-browser/src/tools/generate.ts b/packages/mcps/source-browser/src/tools/generate.ts index 7ebc6cd52..77c0f79a3 100644 --- a/packages/mcps/source-browser/src/tools/generate.ts +++ b/packages/mcps/source-browser/src/tools/generate.ts @@ -41,6 +41,12 @@ export function registerGenerateTool(server: McpServer) { .record(z.string(), z.union([z.string(), z.number(), z.boolean()])) .optional() .describe('Global properties for data-elbglobals'), + scoped: z + .record(z.string(), z.union([z.string(), z.number(), z.boolean()])) + .optional() + .describe( + 'Path-scoped generic properties for data-elb_ (only triggers nested below this element receive them)', + ), link: z .record(z.string(), z.string()) .optional() @@ -67,12 +73,20 @@ export function registerGenerateTool(server: McpServer) { actions, context, globals, + scoped, link, prefix, }) => { try { const hasInput = - entity || data || action || actions || context || globals || link; + entity || + data || + action || + actions || + context || + globals || + scoped || + link; if (!hasInput) { return mcpError( new Error( @@ -90,6 +104,7 @@ export function registerGenerateTool(server: McpServer) { if (actions) t.actions(actions); if (context) t.context(context); if (globals) t.globals(globals); + if (scoped) t.scoped(scoped); if (link) t.link(link); const attributes = t.get(); diff --git a/packages/transformers/validate/jest.config.mjs b/packages/transformers/validate/jest.config.mjs new file mode 100644 index 000000000..d1e06ddf6 --- /dev/null +++ b/packages/transformers/validate/jest.config.mjs @@ -0,0 +1,5 @@ +import baseConfig from '@walkeros/config/jest'; + +const config = {}; + +export default { ...baseConfig, ...config }; diff --git a/packages/transformers/validate/package.json b/packages/transformers/validate/package.json new file mode 100644 index 000000000..419da6783 --- /dev/null +++ b/packages/transformers/validate/package.json @@ -0,0 +1,67 @@ +{ + "name": "@walkeros/transformer-validate", + "description": "JSON Schema contract validation transformer for walkerOS", + "version": "4.1.2", + "license": "MIT", + "main": "./dist/index.js", + "module": "./dist/index.mjs", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.mjs", + "require": "./dist/index.js" + }, + "./walkerOS.json": "./dist/walkerOS.json", + "./dev": { + "types": "./dist/dev.d.ts", + "import": "./dist/dev.mjs", + "require": "./dist/dev.js" + } + }, + "files": [ + "dist/**", + "CHANGELOG.md" + ], + "scripts": { + "build": "tsup --silent", + "clean": "rm -rf .turbo && rm -rf dist", + "generate:format-schema": "node scripts/generate-format-schema.mjs", + "dev": "jest --watchAll --colors", + "typecheck": "tsc --noEmit", + "lint": "eslint \"**/*.ts*\"", + "test": "jest" + }, + "dependencies": { + "@walkeros/core": "4.1.2", + "@cfworker/json-schema": "^4.1.1" + }, + "devDependencies": { + "@walkeros/core": "4.1.2" + }, + "repository": { + "url": "git+https://github.com/elbwalker/walkerOS.git", + "directory": "packages/transformers/validate" + }, + "author": "elbwalker ", + "homepage": "https://github.com/elbwalker/walkerOS#readme", + "bugs": { + "url": "https://github.com/elbwalker/walkerOS/issues" + }, + "walkerOS": { + "type": "transformer", + "docs": "https://www.walkeros.io/docs/transformers/validate", + "platform": [ + "server", + "web" + ] + }, + "keywords": [ + "walkerOS", + "walkerOS-transformer", + "transformer", + "validate", + "json-schema", + "contract" + ] +} diff --git a/packages/transformers/validate/scripts/generate-format-schema.mjs b/packages/transformers/validate/scripts/generate-format-schema.mjs new file mode 100644 index 000000000..75a4ad7c0 --- /dev/null +++ b/packages/transformers/validate/scripts/generate-format-schema.mjs @@ -0,0 +1,41 @@ +// Build-time generator for src/event-format.schema.ts. +// +// Imports the canonical partialEventJsonSchema from @walkeros/core/dev (which +// is derived from the zod EventSchema). This import is BUILD time only: zod is +// fine here. The emitted .ts is a pure plain-object literal with ZERO zod +// tokens, so the runtime bundle never pulls the zod graph. +// +// Requires @walkeros/core to be built (dist present). +// Run: npm run generate:format-schema + +import { writeFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { schemas } from '@walkeros/core/dev'; + +const here = dirname(fileURLToPath(import.meta.url)); +const outPath = resolve(here, '../src/event-format.schema.ts'); + +const schema = schemas.partialEventJsonSchema; + +const header = [ + '// GENERATED by scripts/generate-format-schema.mjs: DO NOT EDIT.', + '// Source: @walkeros/core partialEventJsonSchema (canonical zod EventSchema).', + '// `format: true` validates that the value is a valid WalkerOS.PartialEvent:', + '// the canonical event structure with all fields optional, so it checks shape', + '// and field types, not presence. Required-field enforcement is the contract', + "// arm's job (events/schema with required).", + '// Regenerate: npm run generate:format-schema', +].join('\n'); + +const body = + `${header}\n\n` + + `export const eventFormatSchema = Object.freeze(${JSON.stringify( + schema, + null, + 2, + )} as const);\n`; + +writeFileSync(outPath, body, 'utf8'); +// eslint-disable-next-line no-console +console.log(`Wrote ${outPath}`); diff --git a/packages/transformers/validate/src/__tests__/bundle-isolation.test.ts b/packages/transformers/validate/src/__tests__/bundle-isolation.test.ts new file mode 100644 index 000000000..462eba388 --- /dev/null +++ b/packages/transformers/validate/src/__tests__/bundle-isolation.test.ts @@ -0,0 +1,185 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import * as zlib from 'node:zlib'; +import { build } from 'esbuild'; + +/** + * The JSON Schema engine (@cfworker/json-schema) is a dependency of THIS + * transformer only. It must never leak into @walkeros/core or + * @walkeros/collector, and it must be tree-shaken out of any flow that does not + * use the validate transformer. + * + * These tests assert that isolation property: + * - negative guards: cfworker is absent from core/collector deps and source. + * - positive: a fingerprint-only bundle excludes the engine; a validate bundle + * includes it, and the engine stays small (< 20KB gzipped). + */ + +// __tests__ -> src -> validate -> transformers -> packages -> root +const repoRoot = path.resolve(__dirname, '../../../../..'); +const nodeModules = path.join(repoRoot, 'node_modules'); + +const CFWORKER_PATTERN = /cfworker|class Validator/; + +/** + * Zod must NEVER reach the runtime bundle. The canonical event schema is + * consumed as a pre-serialized static object (src/event-format.schema.ts), + * generated at build time. If zod ever leaks in (e.g. an accidental runtime + * import of @walkeros/core/dev or partialEventJsonSchema), this trips. + */ +const ZOD_PATTERN = /\bzod\b|z\.object|ZodType|ZodObject|_zod\b/; + +function readJson(relPath: string): { dependencies?: Record } { + const raw = fs.readFileSync(path.join(repoRoot, relPath), 'utf8'); + return JSON.parse(raw) as { dependencies?: Record }; +} + +/** Recursively collects .ts/.tsx file contents under a directory. */ +function readSourceTree(absDir: string, skipDir?: string): string { + let combined = ''; + for (const entry of fs.readdirSync(absDir, { withFileTypes: true })) { + if (entry.isDirectory() && entry.name === skipDir) continue; + const full = path.join(absDir, entry.name); + if (entry.isDirectory()) { + combined += readSourceTree(full, skipDir); + } else if (/\.tsx?$/.test(entry.name)) { + combined += fs.readFileSync(full, 'utf8'); + } + } + return combined; +} + +/** + * Bundles a package's own TypeScript SOURCE entry by absolute path. All + * `@walkeros/*` workspace deps are marked external so esbuild never resolves + * them: this keeps the test hermetic (it does NOT depend on any built `dist/`, + * which does not exist in CI before the `test` task runs). The only runtime + * non-`@walkeros` dep that matters here is `@cfworker/json-schema`, a real + * `node_modules` package and a direct dependency of validate, which stays + * bundled. Local relative imports (e.g. `./event-format.schema`) also stay. + */ +async function bundleSource(entryPath: string): Promise { + const result = await build({ + entryPoints: [entryPath], + bundle: true, + treeShaking: true, + platform: 'neutral', + format: 'esm', + write: false, + external: ['@walkeros/*'], + nodePaths: [nodeModules], + logLevel: 'silent', + }); + const out = result.outputFiles[0]; + if (!out) throw new Error('esbuild produced no output'); + return out.text; +} + +/** Bundles a TS snippet in os.tmpdir(), resolving deps via the repo node_modules. */ +async function bundleSnippet(contents: string): Promise { + const result = await build({ + stdin: { contents, resolveDir: os.tmpdir(), loader: 'ts' }, + bundle: true, + treeShaking: true, + platform: 'neutral', + format: 'esm', + write: false, + external: ['@walkeros/*'], + nodePaths: [nodeModules], + logLevel: 'silent', + }); + const out = result.outputFiles[0]; + if (!out) throw new Error('esbuild produced no output'); + return out.text; +} + +const VALIDATE_SRC_ENTRY = path.resolve( + repoRoot, + 'packages/transformers/validate/src/index.ts', +); +const FINGERPRINT_SRC_ENTRY = path.resolve( + repoRoot, + 'packages/server/transformers/fingerprint/src/index.ts', +); + +const gzipBytes = (source: string): number => + zlib.gzipSync(Buffer.from(source, 'utf8')).length; + +describe('bundle isolation: @cfworker/json-schema stays in the validate transformer', () => { + describe('negative guards (core/collector must not depend on cfworker)', () => { + test('@cfworker is not a dependency of @walkeros/core', () => { + const deps = readJson('packages/core/package.json').dependencies ?? {}; + expect(Object.keys(deps)).not.toContain('@cfworker/json-schema'); + }); + + test('@cfworker is not a dependency of @walkeros/collector', () => { + const deps = + readJson('packages/collector/package.json').dependencies ?? {}; + expect(Object.keys(deps)).not.toContain('@cfworker/json-schema'); + }); + + test('@cfworker is not imported anywhere in core or collector source', () => { + const coreSrc = readSourceTree(path.join(repoRoot, 'packages/core/src')); + const collectorSrc = readSourceTree( + path.join(repoRoot, 'packages/collector/src'), + ); + expect(coreSrc).not.toContain('@cfworker'); + expect(collectorSrc).not.toContain('@cfworker'); + }); + }); + + describe('positive isolation (tree-shaking keeps the engine scoped)', () => { + test('fingerprint-only bundle excludes the cfworker engine', async () => { + const bundle = await bundleSource(FINGERPRINT_SRC_ENTRY); + expect(CFWORKER_PATTERN.test(bundle)).toBe(false); + expect(ZOD_PATTERN.test(bundle)).toBe(false); + }); + + test('validate bundle includes the cfworker engine and stays under 20KB gzipped', async () => { + const validateBundle = await bundleSource(VALIDATE_SRC_ENTRY); + expect(CFWORKER_PATTERN.test(validateBundle)).toBe(true); + // The runtime validate bundle must contain ZERO zod tokens: the canonical + // event schema is the pre-serialized static event-format.schema.ts. + expect(ZOD_PATTERN.test(validateBundle)).toBe(false); + + // Quantify the engine's own gzipped footprint in isolation. + const cfworkerOnly = await bundleSnippet( + "export * from '@cfworker/json-schema';\n", + ); + const cfworkerGzip = gzipBytes(cfworkerOnly); + + // eslint-disable-next-line no-console + console.log( + `cfworker engine gzipped: ${cfworkerGzip} bytes; ` + + `full validate bundle gzipped: ${gzipBytes(validateBundle)} bytes`, + ); + + expect(cfworkerGzip).toBeLessThan(20 * 1024); + }); + }); +}); + +describe('web-safety: no Node-only APIs in shipped source', () => { + // Scan the package source (excluding tests, which legitimately use node:fs + // and esbuild) to enforce the web+server agnostic guarantee. + const shippedSrc = readSourceTree(path.resolve(__dirname, '..'), '__tests__'); + + test('no node: protocol imports', () => { + expect(shippedSrc).not.toMatch(/from\s+['"]node:/); + expect(shippedSrc).not.toMatch(/require\(\s*['"]node:/); + }); + + test('no bare fs/path/os imports', () => { + expect(shippedSrc).not.toMatch(/from\s+['"](fs|path|os)['"]/); + expect(shippedSrc).not.toMatch(/require\(\s*['"](fs|path|os)['"]\s*\)/); + }); + + test('no Buffer usage', () => { + expect(shippedSrc).not.toMatch(/\bBuffer\b/); + }); + + test('no process. usage', () => { + expect(shippedSrc).not.toMatch(/\bprocess\./); + }); +}); diff --git a/packages/transformers/validate/src/__tests__/format-schema-sync.test.ts b/packages/transformers/validate/src/__tests__/format-schema-sync.test.ts new file mode 100644 index 000000000..d375d149d --- /dev/null +++ b/packages/transformers/validate/src/__tests__/format-schema-sync.test.ts @@ -0,0 +1,26 @@ +import { schemas } from '@walkeros/core/dev'; +import { eventFormatSchema } from '../event-format.schema'; + +/** + * Drift guard. The committed eventFormatSchema is generated from the canonical + * partialEventJsonSchema (zod EventSchema) at BUILD time. This test imports zod + * via @walkeros/core/dev, but it is a TEST and never bundled into the runtime. + * + * If this fails, the canonical event schema changed; regenerate the artifact: + * npm run generate:format-schema + */ +describe('event-format.schema drift guard', () => { + it('matches the canonical partialEventJsonSchema', () => { + try { + expect(eventFormatSchema).toEqual(schemas.partialEventJsonSchema); + } catch (error) { + const detail = error instanceof Error ? `\n\n${error.message}` : ''; + throw new Error( + 'event-format.schema is out of sync with the canonical ' + + 'partialEventJsonSchema. Regenerate it: ' + + 'npm run generate:format-schema' + + detail, + ); + } + }); +}); diff --git a/packages/transformers/validate/src/__tests__/scaffold.test.ts b/packages/transformers/validate/src/__tests__/scaffold.test.ts new file mode 100644 index 000000000..0344090fc --- /dev/null +++ b/packages/transformers/validate/src/__tests__/scaffold.test.ts @@ -0,0 +1,7 @@ +import { transformerValidate } from '../index'; + +describe('transformerValidate scaffold', () => { + test('is exported as a function', () => { + expect(typeof transformerValidate).toBe('function'); + }); +}); diff --git a/packages/transformers/validate/src/__tests__/settings.schema.test.ts b/packages/transformers/validate/src/__tests__/settings.schema.test.ts new file mode 100644 index 000000000..cbacaf4d6 --- /dev/null +++ b/packages/transformers/validate/src/__tests__/settings.schema.test.ts @@ -0,0 +1,38 @@ +import { zodToSchema } from '@walkeros/core/dev'; +import { SettingsSchema } from '../schemas/settings'; + +describe('validate SettingsSchema', () => { + it('parses a full valid settings object', () => { + const result = SettingsSchema.safeParse({ + contract: [{ events: { page: { view: { type: 'object' } } } }], + format: true, + mode: 'strict', + output: { isValid: 'source.valid', errors: 'validation' }, + }); + expect(result.success).toBe(true); + }); + + it('rejects an unknown mode value', () => { + const result = SettingsSchema.safeParse({ mode: 'drop' }); + expect(result.success).toBe(false); + }); + + it('parses an empty object (all fields optional)', () => { + const result = SettingsSchema.safeParse({}); + expect(result.success).toBe(true); + }); + + it('exposes the four top-level keys in the generated JSON Schema', () => { + const jsonSchema = zodToSchema(SettingsSchema); + const { properties } = jsonSchema; + if (typeof properties !== 'object' || properties === null) { + throw new Error('expected generated schema to have an object properties'); + } + expect(Object.keys(properties).sort()).toEqual([ + 'contract', + 'format', + 'mode', + 'output', + ]); + }); +}); diff --git a/packages/transformers/validate/src/__tests__/stepExamples.test.ts b/packages/transformers/validate/src/__tests__/stepExamples.test.ts new file mode 100644 index 000000000..d7f6854b3 --- /dev/null +++ b/packages/transformers/validate/src/__tests__/stepExamples.test.ts @@ -0,0 +1,84 @@ +import type { Transformer, WalkerOS } from '@walkeros/core'; +import { createIngest, createMockContext } from '@walkeros/core'; +import { transformerValidate } from '../transformer'; +import type { ValidateSettings } from '../types'; +import * as step from '../examples/step'; + +type Types = Transformer.Types; + +/** Requires data.title (string) on a "page view" event. */ +const pageContract = { + events: { + page: { + view: { + type: 'object', + required: ['name', 'data'], + properties: { + name: { type: 'string', const: 'page view' }, + data: { + type: 'object', + required: ['title'], + properties: { title: { type: 'string' } }, + }, + }, + }, + }, + }, +}; + +/** Rejects any event whose name starts with "gtm.". */ +const noGtmContract = { + type: 'object', + properties: { name: { not: { pattern: '^gtm\\.' } } }, +}; + +/** + * Per-example transformer settings. StepExample carries no config slot, so the + * harness keys off the example name (same shape the bot transformer test uses + * for its per-example User-Agent headers). + */ +const settingsByExample: Record = { + strictValidPageView: { contract: [pageContract], mode: 'strict' }, + strictInvalidPageView: { contract: [pageContract], mode: 'strict' }, + passAnnotateInvalid: { contract: [pageContract], mode: 'pass' }, + gtmFilterDropped: { contract: [noGtmContract], mode: 'strict' }, + gtmFilterPasses: { contract: [noGtmContract], mode: 'strict' }, +}; + +describe('validate transformer step examples', () => { + const cases = Object.entries(step) as Array< + [string, (typeof step)[keyof typeof step]] + >; + + it.each(cases)('%s', async (name, example) => { + const settings = settingsByExample[name]; + if (!settings) { + throw new Error(`no settings registered for example "${name}"`); + } + + const instance = await transformerValidate( + createMockContext({ id: 'validate', config: { settings } }), + ); + const ctx = createMockContext({ + id: 'validate', + ingest: createIngest('test'), + }); + const result = await instance.push( + example.in as WalkerOS.DeepPartialEvent, + ctx, + ); + + // A dropped example is encoded as out: [['return', false]]. + if (result === false) { + expect(example.out).toEqual([['return', false]]); + return; + } + if (!result || Array.isArray(result)) { + throw new Error(`expected a single Result for "${name}"`); + } + + // Wrap to match Flow.StepExample.out shape: [['return', { event: {...} }]]. + const actual = [['return', { event: result.event }]]; + expect(actual).toEqual(example.out); + }); +}); diff --git a/packages/transformers/validate/src/__tests__/transformer.test.ts b/packages/transformers/validate/src/__tests__/transformer.test.ts new file mode 100644 index 000000000..52ce6a907 --- /dev/null +++ b/packages/transformers/validate/src/__tests__/transformer.test.ts @@ -0,0 +1,186 @@ +import type { Ingest, Transformer, WalkerOS } from '@walkeros/core'; +import { createIngest, createMockContext, getByPath } from '@walkeros/core'; +import { transformerValidate } from '../transformer'; +import type { ValidateSettings } from '../types'; + +/** Reads a dot-path off ingest and returns it as an array if it is one. */ +const issuesAt = (ingest: Ingest, path: string): unknown[] => { + const value = getByPath(ingest, path); + return Array.isArray(value) ? value : []; +}; + +type Types = Transformer.Types; + +const createInitContext = (config: Partial> = {}) => + createMockContext({ config, id: 'validate' }); + +const createPushContext = () => + createMockContext({ + id: 'validate', + ingest: createIngest('test'), + }); + +// Requires data.title (string); event must be name "page view". +const pageContract = { + events: { + page: { + view: { + type: 'object', + required: ['name', 'data'], + properties: { + name: { type: 'string', const: 'page view' }, + data: { + type: 'object', + required: ['title'], + properties: { title: { type: 'string' } }, + }, + }, + }, + }, + }, +}; + +const validEvent: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + data: { title: 'Home' }, +}; + +const invalidEvent: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + data: {}, +}; + +describe('transformerValidate', () => { + test('type property is "validate"', async () => { + const instance = await transformerValidate(createInitContext({})); + expect(instance.type).toBe('validate'); + }); + + test('valid event passes and writes source.valid = true', async () => { + const instance = await transformerValidate( + createInitContext({ settings: { contract: [pageContract] } }), + ); + const result = await instance.push(validEvent, createPushContext()); + expect(result).not.toBe(false); + expect(result).toMatchObject({ event: { source: { valid: true } } }); + }); + + test('invalid event in strict mode drops (returns false) but still records ingest errors', async () => { + const instance = await transformerValidate( + createInitContext({ + settings: { contract: [pageContract], mode: 'strict' }, + }), + ); + const ctx = createPushContext(); + const result = await instance.push(invalidEvent, ctx); + expect(result).toBe(false); + expect(issuesAt(ctx.ingest, 'validation').length).toBeGreaterThan(0); + }); + + test('invalid event in pass mode continues and annotates source.valid = false', async () => { + const instance = await transformerValidate( + createInitContext({ + settings: { contract: [pageContract], mode: 'pass' }, + }), + ); + const ctx = createPushContext(); + const result = await instance.push(invalidEvent, ctx); + expect(result).not.toBe(false); + expect(result).toMatchObject({ event: { source: { valid: false } } }); + expect(issuesAt(ctx.ingest, 'validation').length).toBeGreaterThan(0); + }); + + test('pass mode is the default (no mode set continues on invalid)', async () => { + const instance = await transformerValidate( + createInitContext({ settings: { contract: [pageContract] } }), + ); + const result = await instance.push(invalidEvent, createPushContext()); + expect(result).not.toBe(false); + expect(result).toMatchObject({ event: { source: { valid: false } } }); + }); + + test('custom output split: verdict at event.data.ok, errors at ingest.diag.problems', async () => { + const instance = await transformerValidate( + createInitContext({ + settings: { + contract: [pageContract], + output: { isValid: 'data.ok', errors: 'diag.problems' }, + }, + }), + ); + const ctx = createPushContext(); + const result = await instance.push(invalidEvent, ctx); + expect(result).toMatchObject({ event: { data: { ok: false } } }); + // default paths must NOT be written + expect(result).not.toMatchObject({ event: { source: { valid: false } } }); + expect(getByPath(ctx.ingest, 'validation')).toBeUndefined(); + expect(issuesAt(ctx.ingest, 'diag.problems').length).toBeGreaterThan(0); + }); + + test('empty-string output paths skip both writes', async () => { + const instance = await transformerValidate( + createInitContext({ + settings: { + contract: [pageContract], + output: { isValid: '', errors: '' }, + }, + }), + ); + const ctx = createPushContext(); + const result = await instance.push(validEvent, ctx); + expect(result).not.toBe(false); + // verdict skipped: no source.valid on the event + expect(result).not.toMatchObject({ event: { source: expect.anything() } }); + // errors skipped: no validation list on the ingest + expect(getByPath(ctx.ingest, 'validation')).toBeUndefined(); + }); + + test('gtm filter via inline contract: strict drops gtm.* names, passes real events', async () => { + const inlineContract = { + type: 'object', + properties: { name: { not: { pattern: '^gtm\\.' } } }, + }; + const instance = await transformerValidate( + createInitContext({ + settings: { contract: [inlineContract], mode: 'strict' }, + }), + ); + + const gtmJs: WalkerOS.DeepPartialEvent = { name: 'gtm.js' }; + const gtmDom: WalkerOS.DeepPartialEvent = { name: 'gtm.dom' }; + const pageView: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + }; + + expect(await instance.push(gtmJs, createPushContext())).toBe(false); + expect(await instance.push(gtmDom, createPushContext())).toBe(false); + const ok = await instance.push(pageView, createPushContext()); + expect(ok).not.toBe(false); + expect(ok).toMatchObject({ event: { source: { valid: true } } }); + }); + + test('format-only malformed event in pass mode annotates invalid', async () => { + const instance = await transformerValidate( + createInitContext({ settings: { format: true, mode: 'pass' } }), + ); + // Canonical structural schema: a non-positive timestamp (must be an + // integer > 0) fails. Missing fields would pass (all fields optional). + const malformed: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + timestamp: -1, + }; + const ctx = createPushContext(); + const result = await instance.push(malformed, ctx); + expect(result).not.toBe(false); + expect(result).toMatchObject({ event: { source: { valid: false } } }); + expect(issuesAt(ctx.ingest, 'validation').length).toBeGreaterThan(0); + }); +}); diff --git a/packages/transformers/validate/src/__tests__/validate.test.ts b/packages/transformers/validate/src/__tests__/validate.test.ts new file mode 100644 index 000000000..8cb5d884a --- /dev/null +++ b/packages/transformers/validate/src/__tests__/validate.test.ts @@ -0,0 +1,247 @@ +import type { WalkerOS } from '@walkeros/core'; +import { getValidator, validateEventAgainstContract } from '../validate'; +import type { ContractSource } from '../types'; + +const pageViewSchema = { + type: 'object', + properties: { + name: { type: 'string', const: 'page view' }, + data: { + type: 'object', + required: ['title', 'path'], + properties: { title: { type: 'string' }, path: { type: 'string' } }, + }, + }, + required: ['name', 'data'], +}; + +const contractRule = ( + events: Record>>, +): ContractSource => ({ events }); + +describe('validateEventAgainstContract', () => { + it('valid event against a referenced schema returns isValid:true, no errors', () => { + const event: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + data: { title: 'Home', path: '/' }, + }; + const result = validateEventAgainstContract(event, undefined, { + contracts: [contractRule({ page: { view: pageViewSchema } })], + }); + expect(result).toEqual({ isValid: true, errors: [] }); + }); + + it('invalid event with two missing top-level required fields reports two errors', () => { + // Flat top-level required so the error count is the two missing fields, + // with no nested `properties` wrapper unit from @cfworker. + const topLevelRequired = { + type: 'object', + required: ['name', 'data'], + properties: { name: { type: 'string' } }, + }; + const event: WalkerOS.DeepPartialEvent = { + entity: 'page', + action: 'view', + }; + const result = validateEventAgainstContract(event, undefined, { + contracts: [contractRule({ page: { view: topLevelRequired } })], + }); + expect(result.isValid).toBe(false); + expect(result.errors.length).toBe(2); + result.errors.forEach((issue) => { + expect(typeof issue.path).toBe('string'); + expect(typeof issue.message).toBe('string'); + expect(issue.level).toBe('error'); + }); + }); + + it('selects a wildcard *.* schema when no specific match exists', () => { + const event: WalkerOS.DeepPartialEvent = { + name: 'order complete', + entity: 'order', + action: 'complete', + }; + const result = validateEventAgainstContract(event, undefined, { + contracts: [ + contractRule({ + '*': { '*': { type: 'object', required: ['data'] } }, + }), + ], + }); + expect(result.isValid).toBe(false); + expect(result.errors.length).toBe(1); + }); + + it('prefers entity.action over entity.*', () => { + const event: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + data: { title: 'Home', path: '/' }, + }; + const result = validateEventAgainstContract(event, undefined, { + contracts: [ + contractRule({ + page: { + '*': { type: 'object', required: ['nonexistent'] }, + view: pageViewSchema, + }, + }), + ], + }); + // entity.action (view) passes; entity.* (requires nonexistent) is NOT applied + expect(result).toEqual({ isValid: true, errors: [] }); + }); + + it('applies an inline whole-event JSON Schema (no events key)', () => { + const event: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + }; + const inline: ContractSource = { + type: 'object', + properties: { name: { const: 'order complete' } }, + }; + const result = validateEventAgainstContract(event, undefined, { + contracts: [inline], + }); + expect(result.isValid).toBe(false); + expect(result.errors.length).toBeGreaterThanOrEqual(1); + }); + + it('format:true passes a well-formed event', () => { + const event: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + data: { title: 'Home' }, + }; + const result = validateEventAgainstContract(event, undefined, { + format: true, + }); + expect(result).toEqual({ isValid: true, errors: [] }); + }); + + it('format:true allows a missing entity (canonical partial: all fields optional)', () => { + // format:true now validates the canonical partialEventJsonSchema, where + // every field is optional. Missing fields pass; format checks STRUCTURE + // and TYPES, not presence. This is the documented widening over the older + // name/entity/action-required check. + const event: WalkerOS.DeepPartialEvent = { + name: 'page view', + action: 'view', + }; + const result = validateEventAgainstContract(event, undefined, { + format: true, + }); + expect(result).toEqual({ isValid: true, errors: [] }); + }); + + it('format:true passes a full well-formed event', () => { + const event: WalkerOS.DeepPartialEvent = { + name: 'order complete', + entity: 'order', + action: 'complete', + data: { id: 'O-1', total: 42.5 }, + context: { stage: ['checkout', 0] }, + timestamp: Date.now(), + }; + const result = validateEventAgainstContract(event, undefined, { + format: true, + }); + expect(result).toEqual({ isValid: true, errors: [] }); + }); + + it('format:true rejects an empty event id', () => { + // The canonical structural schema (a documented superset of the old + // name/entity/action-only check) requires id to be a non-empty string + // (minLength:1). An empty id is a valid DeepPartialEvent at compile time + // but fails the canonical schema. + const event: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + id: '', + }; + const result = validateEventAgainstContract(event, undefined, { + format: true, + }); + expect(result.isValid).toBe(false); + expect(result.errors.length).toBeGreaterThanOrEqual(1); + }); + + it('format:true rejects a non-positive timestamp', () => { + // timestamp must be an integer > 0 (exclusiveMinimum:0). A negative value + // is a valid DeepPartialEvent (number) but fails the canonical schema, + // proving format:true checks the full structure, not just name/entity/action. + const event: WalkerOS.DeepPartialEvent = { + name: 'page view', + entity: 'page', + action: 'view', + timestamp: -1, + }; + const result = validateEventAgainstContract(event, undefined, { + format: true, + }); + expect(result.isValid).toBe(false); + expect(result.errors.length).toBeGreaterThanOrEqual(1); + }); + + it('ANDs across two contract sources and aggregates errors', () => { + const topLevelRequired = { + type: 'object', + required: ['name', 'data'], + properties: { name: { type: 'string' } }, + }; + const event: WalkerOS.DeepPartialEvent = { + entity: 'page', + action: 'view', + }; + const inline: ContractSource = { + type: 'object', + properties: { entity: { const: 'order' } }, + }; + const result = validateEventAgainstContract(event, undefined, { + contracts: [contractRule({ page: { view: topLevelRequired } }), inline], + }); + expect(result.isValid).toBe(false); + // Aggregated across both sources: 2 missing top-level required from the + // rule + 2 from the inline const mismatch (@cfworker emits a parent + // `properties` wrapper unit alongside the leaf `const` unit) = 4. + expect(result.errors.length).toBe(4); + }); + + it('returns isValid:true when no contract entry matches (no constraint)', () => { + const event: WalkerOS.DeepPartialEvent = { + name: 'order complete', + entity: 'order', + action: 'complete', + }; + const result = validateEventAgainstContract(event, undefined, { + contracts: [contractRule({ page: { view: pageViewSchema } })], + }); + expect(result).toEqual({ isValid: true, errors: [] }); + }); + + it('reuses a cached Validator for an equal (re-cloned) schema', () => { + // @cfworker's Validator export is a non-configurable getter, so a + // constructor spy is impossible without casts. Assert the cache contract + // directly: two structurally equal but distinct schema objects (as happens + // when a flow re-clones its config) resolve to the same Validator instance. + const schemaA = { + type: 'object', + required: ['name'], + properties: { name: { type: 'string' } }, + }; + const schemaB = { + type: 'object', + required: ['name'], + properties: { name: { type: 'string' } }, + }; + expect(schemaA).not.toBe(schemaB); + expect(getValidator(schemaA)).toBe(getValidator(schemaB)); + }); +}); diff --git a/packages/transformers/validate/src/dev.ts b/packages/transformers/validate/src/dev.ts new file mode 100644 index 000000000..7f89cb460 --- /dev/null +++ b/packages/transformers/validate/src/dev.ts @@ -0,0 +1,4 @@ +export * as schemas from './schemas'; +export * as examples from './examples'; +export { hints } from './hints'; +export { validateEventAgainstContract } from './validate'; diff --git a/packages/transformers/validate/src/event-format.schema.ts b/packages/transformers/validate/src/event-format.schema.ts new file mode 100644 index 000000000..48bb4f013 --- /dev/null +++ b/packages/transformers/validate/src/event-format.schema.ts @@ -0,0 +1,664 @@ +// GENERATED by scripts/generate-format-schema.mjs: DO NOT EDIT. +// Source: @walkeros/core partialEventJsonSchema (canonical zod EventSchema). +// `format: true` validates that the value is a valid WalkerOS.PartialEvent: +// the canonical event structure with all fields optional, so it checks shape +// and field types, not presence. Required-field enforcement is the contract +// arm's job (events/schema with required). +// Regenerate: npm run generate:format-schema + +export const eventFormatSchema = Object.freeze({ + $schema: 'http://json-schema.org/draft-07/schema#', + description: 'Partial event structure with all fields optional', + allOf: [ + { + $ref: '#/definitions/WalkerOSPartialEvent', + }, + ], + definitions: { + WalkerOSProperty: { + title: 'WalkerOS.Property', + description: + 'PropertyType or an array of PropertyType. Recursive structure for nested objects and arrays.', + anyOf: [ + { + $ref: '#/definitions/WalkerOSPropertyType', + }, + { + type: 'array', + items: { + $ref: '#/definitions/WalkerOSPropertyType', + }, + }, + ], + }, + WalkerOSPropertyType: { + title: 'WalkerOS.PropertyType', + description: + 'Base property value types (boolean, string, number, or nested Property record).', + anyOf: [ + { + type: 'boolean', + }, + { + type: 'string', + }, + { + type: 'number', + }, + { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + $ref: '#/definitions/WalkerOSProperty', + }, + }, + ], + }, + WalkerOSProperties: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + allOf: [ + { + $ref: '#/definitions/WalkerOSProperty', + }, + ], + }, + title: 'WalkerOS.Properties', + description: 'Flexible property collection with optional values.', + }, + WalkerOSOrderedProperties: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + type: 'array', + items: [ + { + $ref: '#/definitions/WalkerOSProperty', + }, + { + type: 'number', + }, + ], + }, + title: 'WalkerOS.OrderedProperties', + description: + 'Ordered properties with [value, order] tuples for priority control.', + }, + WalkerOSUser: { + allOf: [ + { + description: 'Flexible property collection with optional values', + allOf: [ + { + $ref: '#/definitions/WalkerOSProperties', + }, + ], + }, + { + type: 'object', + properties: { + id: { + description: 'User identifier', + type: 'string', + }, + device: { + description: 'Device identifier', + type: 'string', + }, + session: { + description: 'Session identifier', + type: 'string', + }, + hash: { + description: 'Hashed identifier', + type: 'string', + }, + address: { + description: 'User address', + type: 'string', + }, + email: { + description: 'User email address', + type: 'string', + format: 'email', + pattern: + "^(?!\\.)(?!.*\\.\\.)([A-Za-z0-9_'+\\-\\.]*)[A-Za-z0-9_+-]@([A-Za-z0-9][A-Za-z0-9\\-]*\\.)+[A-Za-z]{2,}$", + }, + phone: { + description: 'User phone number', + type: 'string', + }, + userAgent: { + description: 'Browser user agent string', + type: 'string', + }, + browser: { + description: 'Browser name', + type: 'string', + }, + browserVersion: { + description: 'Browser version', + type: 'string', + }, + deviceType: { + description: 'Device type (mobile, desktop, tablet)', + type: 'string', + }, + os: { + description: 'Operating system', + type: 'string', + }, + osVersion: { + description: 'Operating system version', + type: 'string', + }, + screenSize: { + description: 'Screen dimensions', + type: 'string', + }, + language: { + description: 'User language', + type: 'string', + }, + country: { + description: 'User country', + type: 'string', + }, + region: { + description: 'User region/state', + type: 'string', + }, + city: { + description: 'User city', + type: 'string', + }, + zip: { + description: 'User postal code', + type: 'string', + }, + timezone: { + description: 'User timezone', + type: 'string', + }, + ip: { + description: 'User IP address', + type: 'string', + }, + internal: { + description: 'Internal user flag (employee, test user)', + type: 'boolean', + }, + }, + additionalProperties: false, + }, + ], + title: 'WalkerOS.User', + description: 'User identification and attributes.', + }, + __schema0: { + description: 'Nested entity structure with recursive nesting support', + $ref: '#/definitions/WalkerOSEntity', + }, + WalkerOSEntity: { + title: 'WalkerOS.Entity', + description: 'Nested entity structure with recursive nesting support.', + type: 'object', + properties: { + entity: { + type: 'string', + description: 'Entity name', + }, + data: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + allOf: [ + { + $ref: '#/definitions/WalkerOSProperty', + }, + ], + }, + title: 'WalkerOS.Properties', + description: 'Entity-specific properties', + allOf: [ + { + $ref: '#/definitions/WalkerOSProperties', + }, + ], + }, + nested: { + description: 'Nested child entities', + type: 'array', + items: { + $ref: '#/definitions/__schema0', + }, + }, + context: { + description: 'Entity context data', + allOf: [ + { + $ref: '#/definitions/WalkerOSOrderedProperties', + }, + ], + }, + }, + required: ['entity', 'data'], + additionalProperties: false, + }, + WalkerOSEntities: { + type: 'array', + items: { + $ref: '#/definitions/__schema0', + }, + title: 'WalkerOS.Entities', + description: 'Array of nested entities.', + }, + WalkerOSConsent: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + type: 'boolean', + }, + title: 'WalkerOS.Consent', + description: + 'Consent state mapping. Keys are consent groups (e.g. marketing, functional), values are booleans for granted/denied.', + }, + WalkerOSSource: { + allOf: [ + { + description: 'Flexible property collection with optional values', + allOf: [ + { + $ref: '#/definitions/WalkerOSProperties', + }, + ], + }, + { + type: 'object', + properties: { + type: { + type: 'string', + description: 'Source kind (browser, dataLayer, gtag, ...)', + }, + platform: { + description: + 'Runtime platform (web, server, app, ios, android, terminal, ...)', + type: 'string', + }, + version: { + description: 'Deployment version of the source emitter', + type: 'string', + }, + schema: { + description: + 'Event model spec version (collector defaults to "4")', + type: 'string', + }, + count: { + description: 'Emission sequence per run', + type: 'integer', + minimum: 0, + maximum: 9007199254740991, + }, + trace: { + description: 'W3C traceparent full string', + type: 'string', + }, + url: { + type: 'string', + }, + referrer: { + type: 'string', + }, + tool: { + type: 'string', + }, + command: { + type: 'string', + }, + }, + required: ['type'], + additionalProperties: false, + }, + ], + title: 'WalkerOS.Source', + description: 'Event source information (origin of the event).', + }, + WalkerOSPartialEvent: { + type: 'object', + properties: { + name: { + type: 'string', + description: + 'Event name in "entity action" format (e.g., "page view", "product add")', + }, + data: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + allOf: [ + { + $ref: '#/definitions/WalkerOSProperty', + }, + ], + }, + title: 'WalkerOS.Properties', + description: 'Event-specific properties', + allOf: [ + { + $ref: '#/definitions/WalkerOSProperties', + }, + ], + }, + context: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + type: 'array', + items: [ + { + $ref: '#/definitions/WalkerOSProperty', + }, + { + type: 'number', + }, + ], + }, + title: 'WalkerOS.OrderedProperties', + description: 'Ordered context properties with priorities', + allOf: [ + { + $ref: '#/definitions/WalkerOSOrderedProperties', + }, + ], + }, + globals: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + allOf: [ + { + $ref: '#/definitions/WalkerOSProperty', + }, + ], + }, + title: 'WalkerOS.Properties', + description: 'Global properties shared across events', + allOf: [ + { + $ref: '#/definitions/WalkerOSProperties', + }, + ], + }, + custom: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + allOf: [ + { + $ref: '#/definitions/WalkerOSProperty', + }, + ], + }, + title: 'WalkerOS.Properties', + description: 'Custom implementation-specific properties', + allOf: [ + { + $ref: '#/definitions/WalkerOSProperties', + }, + ], + }, + user: { + allOf: [ + { + description: 'Flexible property collection with optional values', + allOf: [ + { + $ref: '#/definitions/WalkerOSProperties', + }, + ], + }, + { + type: 'object', + properties: { + id: { + description: 'User identifier', + type: 'string', + }, + device: { + description: 'Device identifier', + type: 'string', + }, + session: { + description: 'Session identifier', + type: 'string', + }, + hash: { + description: 'Hashed identifier', + type: 'string', + }, + address: { + description: 'User address', + type: 'string', + }, + email: { + description: 'User email address', + type: 'string', + format: 'email', + pattern: + "^(?!\\.)(?!.*\\.\\.)([A-Za-z0-9_'+\\-\\.]*)[A-Za-z0-9_+-]@([A-Za-z0-9][A-Za-z0-9\\-]*\\.)+[A-Za-z]{2,}$", + }, + phone: { + description: 'User phone number', + type: 'string', + }, + userAgent: { + description: 'Browser user agent string', + type: 'string', + }, + browser: { + description: 'Browser name', + type: 'string', + }, + browserVersion: { + description: 'Browser version', + type: 'string', + }, + deviceType: { + description: 'Device type (mobile, desktop, tablet)', + type: 'string', + }, + os: { + description: 'Operating system', + type: 'string', + }, + osVersion: { + description: 'Operating system version', + type: 'string', + }, + screenSize: { + description: 'Screen dimensions', + type: 'string', + }, + language: { + description: 'User language', + type: 'string', + }, + country: { + description: 'User country', + type: 'string', + }, + region: { + description: 'User region/state', + type: 'string', + }, + city: { + description: 'User city', + type: 'string', + }, + zip: { + description: 'User postal code', + type: 'string', + }, + timezone: { + description: 'User timezone', + type: 'string', + }, + ip: { + description: 'User IP address', + type: 'string', + }, + internal: { + description: 'Internal user flag (employee, test user)', + type: 'boolean', + }, + }, + additionalProperties: false, + }, + ], + title: 'WalkerOS.User', + description: 'User identification and attributes', + }, + nested: { + type: 'array', + items: { + $ref: '#/definitions/__schema0', + }, + title: 'WalkerOS.Entities', + description: 'Related nested entities', + allOf: [ + { + $ref: '#/definitions/WalkerOSEntities', + }, + ], + }, + consent: { + type: 'object', + propertyNames: { + type: 'string', + }, + additionalProperties: { + type: 'boolean', + }, + title: 'WalkerOS.Consent', + description: 'Consent states at event time', + allOf: [ + { + $ref: '#/definitions/WalkerOSConsent', + }, + ], + }, + id: { + type: 'string', + minLength: 1, + description: 'W3C span_id, 16 lowercase hex characters', + }, + trigger: { + type: 'string', + description: 'Event trigger identifier', + }, + entity: { + type: 'string', + description: 'Parsed entity from event name', + }, + action: { + type: 'string', + description: 'Parsed action from event name', + }, + timestamp: { + type: 'integer', + exclusiveMinimum: 0, + maximum: 9007199254740991, + description: 'Unix timestamp in milliseconds since epoch', + }, + timing: { + type: 'number', + description: 'Event processing timing information', + }, + source: { + allOf: [ + { + description: 'Flexible property collection with optional values', + allOf: [ + { + $ref: '#/definitions/WalkerOSProperties', + }, + ], + }, + { + type: 'object', + properties: { + type: { + type: 'string', + description: 'Source kind (browser, dataLayer, gtag, ...)', + }, + platform: { + description: + 'Runtime platform (web, server, app, ios, android, terminal, ...)', + type: 'string', + }, + version: { + description: 'Deployment version of the source emitter', + type: 'string', + }, + schema: { + description: + 'Event model spec version (collector defaults to "4")', + type: 'string', + }, + count: { + description: 'Emission sequence per run', + type: 'integer', + minimum: 0, + maximum: 9007199254740991, + }, + trace: { + description: 'W3C traceparent full string', + type: 'string', + }, + url: { + type: 'string', + }, + referrer: { + type: 'string', + }, + tool: { + type: 'string', + }, + command: { + type: 'string', + }, + }, + required: ['type'], + additionalProperties: false, + }, + ], + title: 'WalkerOS.Source', + description: 'Event source information', + }, + }, + additionalProperties: false, + title: 'WalkerOS.PartialEvent', + description: 'Partial event structure with all fields optional.', + }, + }, +} as const); diff --git a/packages/transformers/validate/src/examples/index.ts b/packages/transformers/validate/src/examples/index.ts new file mode 100644 index 000000000..ddf288d59 --- /dev/null +++ b/packages/transformers/validate/src/examples/index.ts @@ -0,0 +1 @@ +export * as step from './step'; diff --git a/packages/transformers/validate/src/examples/step.ts b/packages/transformers/validate/src/examples/step.ts new file mode 100644 index 000000000..039656bb9 --- /dev/null +++ b/packages/transformers/validate/src/examples/step.ts @@ -0,0 +1,117 @@ +import type { Flow } from '@walkeros/core'; + +/** + * strict + valid: passes the contract, gets annotated source.valid:true and + * continues down the chain. + */ +export const strictValidPageView: Flow.StepExample = { + title: 'Strict validate against a contract (valid)', + description: + 'A "page view" with the required data.title passes the inline contract. The verdict source.valid:true is written to the event; the chain continues.', + in: { + name: 'page view', + entity: 'page', + action: 'view', + data: { title: 'Home' }, + }, + out: [ + [ + 'return', + { + event: { + name: 'page view', + entity: 'page', + action: 'view', + data: { title: 'Home' }, + source: { valid: true }, + }, + }, + ], + ], +}; + +/** + * strict + invalid: the same contract, but data.title is missing. In strict + * mode the transformer stops the chain (drops). Errors are still written to the + * ingest for observers; the drop itself is encoded as `['return', false]`. + */ +export const strictInvalidPageView: Flow.StepExample = { + public: false, + title: 'Strict validate against a contract (invalid, dropped)', + description: + 'A "page view" missing the required data.title fails the contract. In strict mode the chain stops (event dropped). Validation errors are still written to the ingest for observers.', + in: { + name: 'page view', + entity: 'page', + action: 'view', + data: {}, + }, + out: [['return', false]], +}; + +/** + * pass + invalid: same failure, but mode:'pass' annotates source.valid:false and + * continues. Downstream destinations route on event.source.valid. The error list + * is written to the ingest (default path "validation"), not asserted here. + */ +export const passAnnotateInvalid: Flow.StepExample = { + public: false, + title: 'Pass mode annotates an invalid event', + description: + 'In mode:"pass" an invalid event is not dropped: source.valid:false is written to the event so downstream destinations can route on it, and the error list is written to the ingest path "validation".', + in: { + name: 'page view', + entity: 'page', + action: 'view', + data: {}, + }, + out: [ + [ + 'return', + { + event: { + name: 'page view', + entity: 'page', + action: 'view', + data: {}, + source: { valid: false }, + }, + }, + ], + ], +}; + +/** + * gtm filter via a contract pattern: the contract rejects names matching + * ^gtm\\. There is no `ignore` field; filtering is a contract that fails. In + * strict mode a "gtm.js" event is dropped. + */ +export const gtmFilterDropped: Flow.StepExample = { + public: false, + title: 'Filter gtm.* via a contract pattern (dropped)', + description: + 'A contract whose name must NOT match ^gtm\\. rejects "gtm.js". In strict mode the event is dropped. This is how you filter without an "ignore" field.', + in: { name: 'gtm.js' }, + out: [['return', false]], +}; + +/** Same gtm-filter contract, but a real "page view" passes the pattern. */ +export const gtmFilterPasses: Flow.StepExample = { + title: 'Filter gtm.* via a contract pattern (real event passes)', + description: + 'The same ^gtm\\. rejection contract leaves a real "page view" untouched: it passes and is annotated source.valid:true.', + in: { name: 'page view', entity: 'page', action: 'view' }, + out: [ + [ + 'return', + { + event: { + name: 'page view', + entity: 'page', + action: 'view', + source: { valid: true }, + }, + }, + ], + ], +}; diff --git a/packages/transformers/validate/src/hints.ts b/packages/transformers/validate/src/hints.ts new file mode 100644 index 000000000..9eec9ae3f --- /dev/null +++ b/packages/transformers/validate/src/hints.ts @@ -0,0 +1,115 @@ +import type { Hint } from '@walkeros/core'; + +export const hints: Hint.Hints = { + 'contract-resolution': { + text: 'A $contract.* reference is resolved to a concrete JSON Schema at bundle time. The runtime transformer never sees a string: by the time push runs, settings.contract holds resolved ContractRule objects (entity-action event schemas and/or a full-event schema) or inline schemas. If you author a flow with $contract.web, the bundler inlines the matching schema before deploy.', + code: [ + { + lang: 'json', + code: JSON.stringify( + { + transformers: { + validate: { + package: '@walkeros/transformer-validate', + config: { + settings: { + contract: ['$contract.web'], + mode: 'strict', + }, + }, + }, + }, + }, + null, + 2, + ), + }, + ], + }, + 'mode-strict-vs-pass': { + text: 'mode:"strict" drops invalid events by stopping the transformer chain (push returns false), so they never reach downstream destinations. mode:"pass" (the default) never drops: it annotates the event with the verdict and continues, and you route downstream on event.source.valid. Use strict to enforce a contract as a hard gate; use pass to observe quality without losing data.', + code: [ + { + lang: 'json', + code: JSON.stringify( + { + transformers: { + validate: { + package: '@walkeros/transformer-validate', + config: { + settings: { contract: ['$contract.web'], mode: 'pass' }, + }, + }, + }, + }, + null, + 2, + ), + }, + ], + }, + 'output-split': { + text: 'The verdict and the error list are written to two different places by design. The boolean verdict goes to the EVENT (default path source.valid): it is analytics-grade data that travels with the event to destinations, and source.valid stays type-clean under WalkerOS.Source. The error list goes to the INGEST (default path validation): it is observer-visible pipeline diagnostics, never analytics data, written in place so it survives even a strict-mode drop. Override either path via output.isValid / output.errors; set either to an empty string to skip that write.', + code: [ + { + lang: 'json', + code: JSON.stringify( + { + transformers: { + validate: { + package: '@walkeros/transformer-validate', + config: { + settings: { + contract: ['$contract.web'], + output: { isValid: 'source.valid', errors: 'validation' }, + }, + }, + }, + }, + }, + null, + 2, + ), + }, + ], + }, + 'format-vs-contract': { + text: 'format:true is a built-in structural check, not an authored schema. It validates the canonical WalkerOS.Event shape (correct field types, no unknown fields). All fields are optional, so it checks structure and types, not presence: a wrong-typed field or malformed structure fails, a missing field does not. It is independent of contract: turn it on to catch malformed events even when you have no contract, or alongside a contract to AND both checks. A contract is the place for your domain rules; format is the place for "is this even a well-formed event".', + code: [ + { + lang: 'json', + code: JSON.stringify( + { + transformers: { + validate: { + package: '@walkeros/transformer-validate', + config: { settings: { format: true, mode: 'pass' } }, + }, + }, + }, + null, + 2, + ), + }, + ], + }, + 'gtm-filtering': { + text: 'There is no ignore field. To filter unwanted events (for example GTM lifecycle pings like gtm.js / gtm.dom), author a contract that REJECTS them and run mode:"strict". A schema where name must NOT match ^gtm\\. fails those events, and strict mode drops them while real events pass. This keeps filtering declarative and contract-driven rather than a separate ad-hoc list.', + code: [ + { + lang: 'json', + code: JSON.stringify( + { + type: 'object', + properties: { name: { not: { pattern: '^gtm\\.' } } }, + }, + null, + 2, + ), + }, + ], + }, + 'known-limitations': { + text: 'The errors list may contain an extra parent entry pointing at a properties wrapper per failure, a quirk of the underlying JSON Schema engine output. The isValid verdict is unaffected: a single real failure can surface as more than one error entry, but isValid is still false exactly when there is at least one failure. v1 emits level:"error" only (no warn level yet).', + }, +}; diff --git a/packages/transformers/validate/src/index.ts b/packages/transformers/validate/src/index.ts new file mode 100644 index 000000000..1b2599131 --- /dev/null +++ b/packages/transformers/validate/src/index.ts @@ -0,0 +1,10 @@ +export { transformerValidate } from './transformer'; +export { validateEventAgainstContract } from './validate'; +export type { + ValidateSettings, + ContractSource, + ValidateOutput, + ValidationIssue, + ValidateResult, +} from './types'; +export { transformerValidate as default } from './transformer'; diff --git a/packages/transformers/validate/src/schemas/index.ts b/packages/transformers/validate/src/schemas/index.ts new file mode 100644 index 000000000..fd7c7f8c1 --- /dev/null +++ b/packages/transformers/validate/src/schemas/index.ts @@ -0,0 +1,5 @@ +import { zodToSchema } from '@walkeros/core/dev'; +import { SettingsSchema } from './settings'; + +export { SettingsSchema, type Settings } from './settings'; +export const settings = zodToSchema(SettingsSchema); diff --git a/packages/transformers/validate/src/schemas/settings.ts b/packages/transformers/validate/src/schemas/settings.ts new file mode 100644 index 000000000..598f6d158 --- /dev/null +++ b/packages/transformers/validate/src/schemas/settings.ts @@ -0,0 +1,47 @@ +import { z } from '@walkeros/core/dev'; + +export const SettingsSchema = z + .object({ + contract: z + .array(z.record(z.string(), z.unknown())) + .optional() + .describe( + 'Validation constraints. Each entry is a resolved $contract.* rule (with entity-action `events` schemas and/or a full-event `schema`) or an inline whole-event JSON Schema. All entries are AND-ed; every error is aggregated.', + ), + format: z + .boolean() + .optional() + .describe( + 'When true, also validate the canonical WalkerOS.Event structural shape (correct field types, no unknown fields). All fields are optional, so this checks structure and types, not presence.', + ), + mode: z + .enum(['strict', 'pass']) + .optional() + .describe( + '`strict` drops invalid events (chain-stop) after recording errors; `pass` annotates and continues. Default `pass`.', + ), + output: z + .object({ + isValid: z + .string() + .optional() + .describe( + 'Event dot-path for the boolean verdict. Default `source.valid`. Empty string = skip.', + ), + errors: z + .string() + .optional() + .describe( + 'Ingest dot-path for the issue list. Default `validation`. Empty string = skip.', + ), + }) + .optional() + .describe( + 'Where the verdict (on the event) and the issue list (on the ingest) are written.', + ), + }) + .describe( + 'Validate transformer: checks events against JSON Schema contracts and annotates a verdict on the event plus issues on the ingest.', + ); + +export type Settings = z.infer; diff --git a/packages/transformers/validate/src/transformer.ts b/packages/transformers/validate/src/transformer.ts new file mode 100644 index 000000000..e0d187c70 --- /dev/null +++ b/packages/transformers/validate/src/transformer.ts @@ -0,0 +1,78 @@ +import type { Transformer } from '@walkeros/core'; +import { setByPath } from '@walkeros/core'; +import { validateEventAgainstContract } from './validate'; +import type { ValidateSettings } from './types'; + +/** + * Mutating dot-path setter for ingest writes. + * + * We can't use @walkeros/core setByPath here: it clones-and-returns (immutable), + * but ingest is the pipeline's mutable scratch context. We need in-place writes + * so subsequent transformers and observers in the chain see the values. + */ +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function setNestedPath( + obj: Record, + path: string, + value: unknown, +): void { + const keys = path.split('.'); + let cur: Record = obj; + for (let i = 0; i < keys.length - 1; i++) { + const k = keys[i]; + const next = cur[k]; + if (isRecord(next)) { + cur = next; + } else { + const child: Record = {}; + cur[k] = child; + cur = child; + } + } + cur[keys[keys.length - 1]] = value; +} + +export const transformerValidate: Transformer.Init< + Transformer.Types +> = (context) => { + const { config } = context; + const settings: ValidateSettings = config.settings ?? {}; + + const mode = settings.mode ?? 'pass'; + const isValidPath = settings.output?.isValid ?? 'source.valid'; + const errorsPath = settings.output?.errors ?? 'validation'; + + return { + // Init's input config type is Partial; the instance config type + // is Settings. Same cast pattern the bot/fingerprint transformers use. + type: 'validate', + config: config as Transformer.Config>, + + async push(event, ctx) { + const { ingest } = ctx; + + const { isValid, errors } = validateEventAgainstContract(event, ingest, { + contracts: settings.contract, + format: settings.format, + }); + + // Issues go to the INGEST (observer-visible diagnostics), written in + // place so they survive even when a strict drop stops the chain. + if (errorsPath) setNestedPath(ingest, errorsPath, errors); + + // Verdict goes to the EVENT (travels to destinations as analytics data). + // setByPath is immutable, so reassign. + let nextEvent = event; + if (isValidPath) nextEvent = setByPath(nextEvent, isValidPath, isValid); + + // strict + invalid: chain-stop drop. Errors are already on the ingest, + // so the drop is still diagnosable. + if (mode === 'strict' && !isValid) return false; + + return { event: nextEvent }; + }, + }; +}; diff --git a/packages/transformers/validate/src/types.ts b/packages/transformers/validate/src/types.ts new file mode 100644 index 000000000..0b28840ce --- /dev/null +++ b/packages/transformers/validate/src/types.ts @@ -0,0 +1,62 @@ +import type { Flow } from '@walkeros/core'; + +/** + * A single source of validation constraints. + * + * Either a resolved `$contract.*` rule (a `Flow.ContractRule` with `.events` + * entity-action schemas and/or a full-event `.schema`), or an inline whole-event + * JSON Schema applied to the entire event. + */ +export type ContractSource = Flow.ContractRule | Record; + +/** + * Where the validation verdict and the issue list are written. + * + * `isValid` is a dot-path on the EVENT. Defaults to `source.valid`: putting the + * boolean verdict under `event.source` keeps it type-clean, since + * `WalkerOS.Source extends WalkerOS.Properties` accepts a boolean value, and it + * travels with the event to downstream destinations as analytics-grade data. + * + * `errors` is a dot-path on the INGEST (pipeline scratch). Defaults to + * `validation`: the issue list is observer-visible diagnostics, never analytics + * data, so it stays off the event and on the mutable ingest context where other + * steps and observers can read it. + */ +export interface ValidateOutput { + /** Event dot-path for the boolean verdict. Default `source.valid`. Empty string = skip. */ + isValid?: string; + /** Ingest dot-path for the issue list. Default `validation`. Empty string = skip. */ + errors?: string; +} + +/** + * Validate transformer settings. + * + * Defaults: + * - `mode`: `pass` (annotate and continue; never drops the event). + * - `output.isValid`: `source.valid` (verdict written to the EVENT). + * - `output.errors`: `validation` (issues written to the INGEST, relative root). + */ +export interface ValidateSettings { + /** Validation constraints. Each entry is AND-ed; all errors are aggregated. */ + contract?: ContractSource[]; + /** Also validate that the event is a valid `WalkerOS.PartialEvent`: the canonical event structure with all fields optional, so `format` checks shape and field types, not presence. Required-field enforcement is the contract arm's job. */ + format?: boolean; + /** `strict` drops invalid events (chain-stop); `pass` annotates and continues. Default `pass`. */ + mode?: 'strict' | 'pass'; + /** Where the verdict and issue list are written. */ + output?: ValidateOutput; +} + +/** A single validation issue. */ +export interface ValidationIssue { + path: string; + message: string; + level?: 'error' | 'warn'; +} + +/** Aggregate validation result. */ +export interface ValidateResult { + isValid: boolean; + errors: ValidationIssue[]; +} diff --git a/packages/transformers/validate/src/validate.ts b/packages/transformers/validate/src/validate.ts new file mode 100644 index 000000000..0b746888b --- /dev/null +++ b/packages/transformers/validate/src/validate.ts @@ -0,0 +1,110 @@ +import type { Flow, Ingest, ValidateEvents, WalkerOS } from '@walkeros/core'; +import * as cfworker from '@cfworker/json-schema'; +import type { ContractSource, ValidationIssue } from './types'; +import { eventFormatSchema } from './event-format.schema'; + +/** + * Module-level Validator cache keyed by the compact JSON of the schema. + * + * A plain Map (not WeakMap): schemas are re-cloned per flow load, so identical + * structures arrive as distinct object references. Keying on serialized content + * lets repeated pushes within and across loads share one compiled interpreter. + */ +const validatorCache = new Map(); + +export function getValidator( + schema: Record, +): cfworker.Validator { + const key = JSON.stringify(schema); + const cached = validatorCache.get(key); + if (cached) return cached; + // cfworker dereferences $ref by annotating each subschema in place + // (e.g. __absolute_uri__). Parse a fresh mutable copy from the cache key so a + // frozen input (the generated eventFormatSchema) stays untouched. + const mutable = JSON.parse(key) as Record; + // Pin draft 2020-12 (the contract authoring draft). + const validator = new cfworker.Validator(mutable, '2020-12'); + validatorCache.set(key, validator); + return validator; +} + +function isContractRule(source: ContractSource): source is Flow.ContractRule { + return ( + typeof source === 'object' && + source !== null && + 'events' in source && + typeof (source as { events?: unknown }).events === 'object' + ); +} + +/** + * Selects the entity-action schema with the documented wildcard fallback: + * entity.action → entity.* → *.action → *.*. + */ +function selectEventSchema( + events: ValidateEvents, + entity: string, + action: string, +): Record | undefined { + return ( + events[entity]?.[action] ?? + events[entity]?.['*'] ?? + events['*']?.[action] ?? + events['*']?.['*'] + ); +} + +/** Collects every JSON Schema that applies to this event. */ +function collectSchemas( + event: WalkerOS.DeepPartialEvent, + opts: { contracts?: ContractSource[]; format?: boolean }, +): Record[] { + const schemas: Record[] = []; + + if (opts.format) schemas.push(eventFormatSchema); + + for (const source of opts.contracts ?? []) { + if (isContractRule(source)) { + const entity = typeof event.entity === 'string' ? event.entity : ''; + const action = typeof event.action === 'string' ? event.action : ''; + const selected = selectEventSchema(source.events ?? {}, entity, action); + if (selected) schemas.push(selected); + if (source.schema) schemas.push(source.schema); + } else { + // Inline whole-event JSON Schema. + schemas.push(source); + } + } + + return schemas; +} + +/** + * The validation verdict authority. Runs the event through every applicable + * JSON Schema (AND semantics) and aggregates all @cfworker errors. + * + * No matching constraint (empty schema set) means no opinion: isValid is true. + */ +export function validateEventAgainstContract( + event: WalkerOS.DeepPartialEvent, + _ingest: Ingest | undefined, + opts: { contracts?: ContractSource[]; format?: boolean }, +): { isValid: boolean; errors: ValidationIssue[] } { + const schemas = collectSchemas(event, opts); + + const errors: ValidationIssue[] = []; + for (const schema of schemas) { + const result = getValidator(schema).validate(event); + if (!result.valid) { + for (const unit of result.errors) { + errors.push({ + path: unit.instanceLocation, + message: unit.error, + level: 'error', + }); + } + } + } + + return { isValid: errors.length === 0, errors }; +} diff --git a/packages/transformers/validate/tsconfig.json b/packages/transformers/validate/tsconfig.json new file mode 100644 index 000000000..b1c75c589 --- /dev/null +++ b/packages/transformers/validate/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "@walkeros/config/tsconfig/node.json", + "compilerOptions": { + "rootDir": "src" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/transformers/validate/tsup.config.ts b/packages/transformers/validate/tsup.config.ts new file mode 100644 index 000000000..482b31a7c --- /dev/null +++ b/packages/transformers/validate/tsup.config.ts @@ -0,0 +1,3 @@ +import { defineConfig, buildModules, buildDev } from '@walkeros/config/tsup'; + +export default defineConfig([buildModules(), buildDev()]); diff --git a/packages/web/sources/browser/src/__tests__/tagger.test.ts b/packages/web/sources/browser/src/__tests__/tagger.test.ts index bd7181835..1172ad45f 100644 --- a/packages/web/sources/browser/src/__tests__/tagger.test.ts +++ b/packages/web/sources/browser/src/__tests__/tagger.test.ts @@ -299,6 +299,54 @@ describe('Tagger', () => { }); }); + describe('Scoped Method', () => { + test('single key-value', () => { + const result = createTagger()().scoped('size', 'L').get(); + expect(result).toMatchObject({ 'data-elb_': 'size:L' }); + }); + + test('object with multiple scoped properties', () => { + const result = createTagger()().scoped({ size: 'L', color: 'red' }).get(); + expect(result).toMatchObject({ 'data-elb_': 'size:L;color:red' }); + }); + + test('accumulates multiple scoped calls', () => { + const result = createTagger()() + .scoped('size', 'L') + .scoped({ color: 'red' }) + .scoped('fit', 'slim') + .get(); + expect(result).toMatchObject({ + 'data-elb_': 'size:L;color:red;fit:slim', + }); + }); + + test('is entity-agnostic (coexists with entity and data)', () => { + const result = createTagger()() + .entity('product') + .data('name', 'A') + .scoped('size', 'L') + .get(); + expect(result).toMatchObject({ + 'data-elb': 'product', + 'data-elb-product': 'name:A', + 'data-elb_': 'size:L', + }); + }); + + test('respects custom prefix', () => { + const result = createTagger({ prefix: 'custom-prefix' })() + .scoped('size', 'L') + .get(); + expect(result).toMatchObject({ 'custom-prefix_': 'size:L' }); + }); + + test('escapes special characters like other methods', () => { + const result = createTagger()().scoped('k', "a;b:c'd\\e").get(); + expect(result).toMatchObject({ 'data-elb_': "k:a\\;b\\:c\\'d\\\\e" }); + }); + }); + describe('Link Method', () => { test('single id and type', () => { const result = createTagger()().link('details', 'parent').get(); @@ -408,6 +456,22 @@ describe('Tagger', () => { }); }); + test('full chain including scoped generic', () => { + const result = createTagger()() + .entity('product') + .data('name', 'A') + .scoped('size', 'L') + .action('click', 'select') + .get(); + + expect(result).toMatchObject({ + 'data-elb': 'product', + 'data-elb-product': 'name:A', + 'data-elb_': 'size:L', + 'data-elbaction': 'click:select', + }); + }); + test('entity change mid-chain', () => { const result = createTagger()('product') .data('id', 123) diff --git a/packages/web/sources/browser/src/__tests__/walker.test.ts b/packages/web/sources/browser/src/__tests__/walker.test.ts index 7dbff761a..0a4df0b6d 100644 --- a/packages/web/sources/browser/src/__tests__/walker.test.ts +++ b/packages/web/sources/browser/src/__tests__/walker.test.ts @@ -1,5 +1,6 @@ import { getAllEvents, getEvents, getGlobals } from '../walker'; import { Triggers } from '../trigger'; +import { createTagger } from '../tagger'; import fs from 'fs'; describe('Walker', () => { @@ -521,6 +522,23 @@ describe('Walker', () => { getEvents(getElem('scoped-blanket-guard'), Triggers.Click), ).toMatchObject([{ entity: 'bg', data: { leak: 'everywhere' } }]); }); + + test('tagger-generated data-elb_ round-trips through the walker', () => { + const attrs = createTagger()() + .entity('product') + .data('name', 'A') + .scoped('size', 'L') + .action('click') + .get(); + + const el = document.createElement('div'); + Object.entries(attrs).forEach(([k, v]) => el.setAttribute(k, v)); + document.body.appendChild(el); + + expect(getEvents(el, Triggers.Click)).toMatchObject([ + { entity: 'product', action: 'click', data: { name: 'A', size: 'L' } }, + ]); + }); }); function getElem(selector: string) { diff --git a/packages/web/sources/browser/src/tagger.ts b/packages/web/sources/browser/src/tagger.ts index f0b870ac8..ae531d15e 100644 --- a/packages/web/sources/browser/src/tagger.ts +++ b/packages/web/sources/browser/src/tagger.ts @@ -9,6 +9,8 @@ export interface TaggerInstance { entity: (name: string) => TaggerInstance; data: ((key: string, value: WalkerOS.Property) => TaggerInstance) & ((data: WalkerOS.Properties) => TaggerInstance); + scoped: ((key: string, value: WalkerOS.Property) => TaggerInstance) & + ((scoped: WalkerOS.Properties) => TaggerInstance); action: ((trigger: string, action?: string) => TaggerInstance) & ((actions: Record) => TaggerInstance); actions: ((trigger: string, action?: string) => TaggerInstance) & @@ -38,6 +40,7 @@ export function createTagger( let currentEntity: string | undefined = undefined; // Only set via .entity() method let namingEntity: string | undefined = entity; // Used for data attribute naming const dataProperties: Record = {}; + const scopedProperties: WalkerOS.Properties = {}; const actionProperties: Record = {}; const actionsProperties: Record = {}; const contextProperties: WalkerOS.Properties = {}; @@ -92,6 +95,19 @@ export function createTagger( return instance; }, + scoped( + keyOrScoped: string | WalkerOS.Properties, + value?: WalkerOS.Property, + ): TaggerInstance { + if (isString(keyOrScoped)) { + scopedProperties[keyOrScoped] = value; + } else { + Object.assign(scopedProperties, keyOrScoped); + } + + return instance; + }, + action( triggerOrActions: string | Record, actionValue?: string, @@ -197,6 +213,12 @@ export function createTagger( } }); + // Add scoped generic attribute (data-elb_): branch-scoped, bubble-up + // only. Suffix must match Const.Commands.Scoped ('_'). + if (Object.keys(scopedProperties).length > 0) { + attributes[`${prefix}_`] = serializeKeyValue(scopedProperties); + } + // Add action attributes if (Object.keys(actionProperties).length > 0) { attributes[`${prefix}action`] = serializeKeyValue(actionProperties); diff --git a/skills/walkeros-understanding-flow/SKILL.md b/skills/walkeros-understanding-flow/SKILL.md index 3d4e51743..39f077eda 100644 --- a/skills/walkeros-understanding-flow/SKILL.md +++ b/skills/walkeros-understanding-flow/SKILL.md @@ -168,14 +168,16 @@ for the full store interface and lifecycle. ## Step-Level Primitives Every step (source, transformer, destination) supports a small set of inline -primitives alongside its package wiring. `cache`, `mapping`, and `consent` are -the long-established ones; `validate?` is the newest. `validate:` declares -validation intent (format check, per-event JSON Schemas, or a single generic -schema) inline on a step, just like `cache` declares caching intent. It is a -declarative description: consumers (CLI tooling, MCP, custom runners) decide how -to enforce it. See +primitives alongside its package wiring: `cache`, `mapping`, and `consent`. + +Event shapes are not a step-level primitive. They live in the top-level +`contract` block (a sibling of `flows`) as named JSON Schemas, and enforcement +is an explicit `@walkeros/transformer-validate` step that references a contract +via `$contract.`. See +[Website: Contract](../../website/docs/getting-started/flow/contract.mdx) for +the contract shape and [Website: Validate](../../website/docs/getting-started/flow/validate.mdx) for -the full shape. +runtime enforcement. ### How a step references its implementation diff --git a/skills/walkeros-understanding-transformers/SKILL.md b/skills/walkeros-understanding-transformers/SKILL.md index fce3b4441..6044c7e25 100644 --- a/skills/walkeros-understanding-transformers/SKILL.md +++ b/skills/walkeros-understanding-transformers/SKILL.md @@ -25,6 +25,28 @@ or deliver (destinations)—they modify events in-flight. | **Enrich** | Add server-side data to events | User segments, geo data | | **Redact** | Remove sensitive data before destinations | Strip PII, anonymize IPs | +## Available Packages + +| Package | Env | Purpose | +| ------------------------------------------ | ------ | -------------------------------------------------- | +| `@walkeros/transformer-validate` | both | Enforce JSON Schema contracts on events at runtime | +| `@walkeros/transformer-ga4` | server | Decode GA4 Measurement Protocol hits into events | +| `@walkeros/server-transformer-bot` | server | Annotate bot / AI-agent scores | +| `@walkeros/server-transformer-fingerprint` | server | Derive a stable visitor fingerprint | + +### Contract validation + +`@walkeros/transformer-validate` is the runtime arm of a +[contract](../../website/docs/getting-started/flow/contract.mdx). Event shapes +live in the top-level `contract` block; the transformer references one via +`$contract.` in its `contract` setting and validates the canonical event. +`mode: "strict"` drops invalid events (chain-stop); `mode: "pass"` (default) +annotates `event.source.valid` and continues so a downstream step can route on +the verdict. `format: true` additionally checks the canonical `WalkerOS.Event` +structure. Filtering is the same mechanism: an inline schema that rejects the +unwanted events plus `mode: "strict"`, there is no separate `ignore` field. See +[Website: Validate transformer](../../website/docs/transformers/validate.mdx). + ## Transformer Interface See diff --git a/website/docs/getting-started/flow/contract.mdx b/website/docs/getting-started/flow/contract.mdx index b155ee827..7318d72b0 100644 --- a/website/docs/getting-started/flow/contract.mdx +++ b/website/docs/getting-started/flow/contract.mdx @@ -32,13 +32,13 @@ A contract is the schema for your event data: which fields are required, what ty ## Why use a contract -A contract is a single, inheritable description of what your events should look like. It does not enforce anything at runtime: tools and humans read it for governance, documentation, and schema-driven workflows. Without a contract, schema rules can live inline (`validate:` on each step), which duplicates across flows. +A contract is a single, inheritable description of what your events should look like. The contract itself describes; enforcement is an explicit [`@walkeros/transformer-validate`](/docs/transformers/validate) step that references it. Tools and humans also read the contract for governance, documentation, and schema-driven workflows. - **Single source of truth.** Define `product add` requirements once. Every flow that ships these events references the same definition. - **Inheritance.** Layer additional rules on top with `extend` (for example, `web_loggedin` extend `web` extend `default`) rather than copying. - **Versioned.** `tagging` tracks contract revisions alongside the events they govern. - **Self-documenting.** Each schema is JSON Schema, so `description` and `examples` annotate fields the same way humans and tools read them. -- **Decoupled from enforcement.** Contracts describe what events should look like, step-level `validate:` references them, consumers decide whether to enforce. +- **Decoupled from enforcement.** The contract describes what events should look like; the validate transformer enforces it where you place it in the pipeline. - **Composable with the rest of the config.** Reference fragments anywhere via `$contract..` (see [Reference syntax](/docs/guides/reference-syntax#contract--contract-references)). If your flow has a single throwaway event, you do not need a contract. Reach for one as soon as the same shape needs to hold across more than one flow. @@ -234,15 +234,13 @@ Annotations stay on the source contract for tooling (CLI hints, IDE descriptions ## `$contract` references -Reference any part of a resolved contract with `$contract..`. The contract is fully resolved (extend + wildcards) before path access, so the returned value is the merged shape, not the raw entry. +Reference any part of a resolved contract with `$contract..`. The contract is fully resolved (extend + wildcards) before path access, so the returned value is the merged shape, not the raw entry. A validate transformer references a contract via its `contract` setting: ```json -"destinations": { - "ga4": { - "validate": { - "schema": "$contract.web.schema", - "events": "$contract.web.events" - } +"transformers": { + "validate": { + "package": "@walkeros/transformer-validate", + "config": { "settings": { "contract": ["$contract.web"], "mode": "strict" } } } } ``` @@ -373,12 +371,10 @@ A shorter end-to-end illustration: "flows": { "web-shop": { "config": { "platform": "web" }, - "destinations": { - "ga4": { - "validate": { - "schema": "$contract.web_loggedin.schema", - "events": "$contract.web_loggedin.events" - } + "transformers": { + "validate": { + "package": "@walkeros/transformer-validate", + "config": { "settings": { "contract": ["$contract.web_loggedin"], "mode": "strict" } } } } } @@ -386,7 +382,7 @@ A shorter end-to-end illustration: } ``` -For `product add`, a `validate:` block that references `$contract.web_loggedin` sees these rules (all from the merged shape): +For `product add`, a validate transformer that references `$contract.web_loggedin` sees these rules (all from the merged shape): | Source | Rule | |--------|------| @@ -398,7 +394,7 @@ For `product add`, a `validate:` block that references `$contract.web_loggedin` ## Next steps -- **[Validate](/docs/getting-started/flow/validate)**: step-level `validate:` primitive that consumes contracts +- **[Validate](/docs/getting-started/flow/validate)**: enforce a contract at runtime with the validate transformer - **[Mapping](/docs/mapping)**: transform events between steps - **[Step examples](/docs/getting-started/flow/step-examples)**: pair every step with input/output fixtures - **[Reference syntax](/docs/guides/reference-syntax)**: all `$contract`, `$var`, `$flow`, `$store`, `$secret`, `$code:`, `$env` references diff --git a/website/docs/getting-started/flow/validate.mdx b/website/docs/getting-started/flow/validate.mdx index b333b8a32..a87c7529e 100644 --- a/website/docs/getting-started/flow/validate.mdx +++ b/website/docs/getting-started/flow/validate.mdx @@ -1,63 +1,50 @@ --- title: Validate -description: Step-level validation primitive. Declare validation inline on any step. +description: Enforce a contract at runtime with the validate transformer. sidebar_position: 3 --- # Validate -`validate:` is a step-level primitive that attaches validation rules directly to a source, transformer, or destination. It is a declarative description, like `cache` or `consent`: consumers (CLI, MCP, custom runners) decide how to enforce. +A [contract](/docs/getting-started/flow/contract) is the canonical definition of what your events should look like. Validation enforces that contract at runtime with an explicit transformer step, `@walkeros/transformer-validate`, the same way enrichment or decoding is a transformer step. The contract describes the shape; the transformer applies it. + +## Where event shapes live + +Event shapes live in the top-level `contract` block of flow.json (a sibling of `flows`), keyed by entity then action as JSON Schema, with `extend` inheritance and `*` wildcards. Reference any contract from a step with `$contract.`. ```json -"destinations": { - "ga4": { - "package": "@walkeros/web-destination-gtag", - "validate": { - "events": { - "order": { - "complete": { "properties": { "data": { "required": ["total"] } } } - } - } - } +{ + "version": 4, + "contract": { + "web": { "events": { "order": { "complete": { "properties": { "data": { "required": ["total", "currency"] } } } } } } } } ``` -## The shape - -`validate` is an object with three optional fields. At least one must be set for the primitive to do anything. - -| Field | Type | Description | -|-------|------|-------------| -| `format?` | `boolean` | Check the full `WalkerOS.Event` structural shape (name, source, timestamp, etc.) | -| `events?` | `Record>` | Per-event JSON Schemas keyed by entity then action, with `*` wildcard fallback | -| `schema?` | `JsonSchema` | A single JSON Schema applied to every event that reaches the step | - -## events or schema - -Per-event rules go in `events`. Apply-to-every-event rules go in `schema`. - -## Step-level usage - -`validate:` lives on the step it protects. Same shape on every step type. +See [Contract](/docs/getting-started/flow/contract) for the full shape, inheritance with `extend`, and wildcard merging. -### On a destination +## Enforce at runtime -Declare that orders without a total should not reach GA4. Other destinations on the same flow are unaffected by this declaration. +Add a `@walkeros/transformer-validate` step and point its `contract` setting at the contract. The transformer reads the `{ ingest, event }` context and validates the canonical event. -### On a source +### Settings -Declare ingest payloads that miss a required field as invalid before they hit the collector. +| Field | Type | Description | +|-------|------|-------------| +| `contract?` | `Array<$contract ref \| JSON Schema>` | Constraints. A `$contract.` ref selects per `entity.action`; an inline schema applies to the whole event. All entries must pass (AND). | +| `format?` | `boolean` | Also validate the canonical `WalkerOS.Event` structure. All fields are optional, so this checks shape and field types, not presence. | +| `mode?` | `'strict' \| 'pass'` | `strict` drops invalid events (stops the chain); `pass` (default) annotates and continues so a downstream route can act. | +| `output?` | `{ isValid?, errors? }` | Split targets. `isValid` (default `source.valid`) writes the boolean verdict onto the event; `errors` (default `validation`) writes the issue list onto the ingest context. Empty string skips a write. | - +The verdict travels with the event as analytics-grade data (`event.source.valid`). The issue list (`{ path, message }` entries) stays off the event, on the observer-visible ingest context, so it survives even a strict-mode drop. -### On a transformer +## Filter unwanted events -Declare a precondition for a transformer, for example that a downstream enrichment transformer never sees an event without a user id. +Filtering is the same mechanism, not a separate setting. Author an inline schema that rejects the unwanted events and run `mode: "strict"`. For example, drop GTM lifecycle noise (`gtm.js`, `gtm.dom`) by rejecting any `name` matching `^gtm\.`: -## Failure semantic +## Design-time checks -`validate:` is declarative, runtime behavior depends on the consumer. The typical interpretation is local to the step: a failed validation skips the step it is attached to and other destinations, transformers, and sources on the same flow continue to receive the event. Treat this as the recommended convention, not a guaranteed runtime contract. +The CLI flow validator checks step examples (`destination.in`, `transformer.in`) against the resolved contract and reports violations before you deploy, so a sample event that breaks the contract is caught at authoring time. +```bash +walkeros validate flow.json ``` - ┌──────────────────────────┐ - │ ga4 (validate fails) │ -event "order complete" ──┤ skipped │ - │ │ - │ meta (no validate here) │ - │ receives event │ - └──────────────────────────┘ -``` - -This local-to-the-step semantic is the key difference from a flow-level validator that drops events globally. - -## Lifecycle position - -The recommended logical position is after the step's `before` chain and before the step's main action. On a source that places validation in front of the collector, on a transformer or destination it sits in front of the main push. The validator sees the event in the same shape the step itself would see. - -## Wiring from a contract - -`validate:` accepts `$contract..` references just like any other config value. Define rules once in a contract, reference them from each step: - - - -See [Contract](/docs/getting-started/flow/contract) for the full contract shape, inheritance with `extend`, and wildcard merging. - -## Format layer - -Setting `format: true` declares that the full `WalkerOS.Event` structural check should run on the input. That covers required top-level fields (`name`, `source`, `timestamp`, `id`, ...), the entity-action `event name` pattern, `source.type` being one of the known kinds, and the basic shape of nested objects (`data`, `context`, `globals`, `user`, `consent`). Use it as a cheap sanity gate when the step receives data from arbitrary callers, skip it when the input is already a `WalkerOS.Event` produced by the collector. ## Next steps - **[Contract](/docs/getting-started/flow/contract)**: named, inheritable schemas referenced via `$contract` +- **[Validate transformer](/docs/transformers/validate)**: full settings reference and recipes - **[Mapping](/docs/mapping)**: transform events between steps diff --git a/website/docs/sources/web/browser/tagger.mdx b/website/docs/sources/web/browser/tagger.mdx index ed1f84aa4..f01b264fc 100644 --- a/website/docs/sources/web/browser/tagger.mdx +++ b/website/docs/sources/web/browser/tagger.mdx @@ -138,6 +138,28 @@ const attributes = tagger() language="typescript" /> +### Scoped generic properties + +`scoped()` emits the `data-elb_` attribute: a generic property that only reaches +triggers nested below the element, unlike `data-elb-` which every trigger in the +entity receives. It uses the same value syntax and escaping as the other methods. + + + ### Multiple entity scopes +##### `scoped(key: string, value: Property)` | `scoped(object: Properties)` + +Adds path-scoped generic properties. Creates a `data-elb_` attribute that only +applies to triggers nested below the element. + + + ##### `link(id: string, type: string)` | `link(object: Record)` Adds link relationships between elements. diff --git a/website/docs/transformers/validate.mdx b/website/docs/transformers/validate.mdx new file mode 100644 index 000000000..8ed9f346d --- /dev/null +++ b/website/docs/transformers/validate.mdx @@ -0,0 +1,129 @@ +--- +title: Validate +description: 'Enforce JSON Schema contracts on events at runtime' +sidebar_position: 3 +package: '@walkeros/transformer-validate' +--- + +import data from '@walkeros/transformer-validate/walkerOS.json'; +import Configuration from '@site/src/components/snippets/_configuration.mdx'; + +# Validate + + + +Checks events against JSON Schema [contracts](/docs/getting-started/flow/contract) and records a verdict. In `pass` mode it annotates the event and continues so a downstream step can route on the result. In `strict` mode it drops invalid events by stopping the chain. The transformer runs on both web and server. + +## Installation + + + + + + + + + + + + + + + + +## How it works + +The transformer reads the `{ ingest, event }` context and validates the canonical event. Each `contract` entry is a constraint: a `$contract.` reference resolves to the entity-action schemas for the event, an inline JSON Schema applies to the whole event. All entries are AND-ed and every error is aggregated. Set `format: true` to additionally check the canonical `WalkerOS.Event` structure. + +`mode` decides what happens to an invalid event: + +- **`pass`** (default): write the verdict to the event and continue. A downstream destination or transformer routes on `event.source.valid`. +- **`strict`**: record the errors, then stop the chain so the event never reaches downstream steps. + +The verdict and the error list are written to two different places. The boolean verdict goes onto the **event** (`output.isValid`, default `source.valid`) as analytics-grade data that travels with the event. The issue list goes onto the **ingest** (`output.errors`, default `validation`) as observer-visible diagnostics, never event data, so it survives even a strict-mode drop. Set either path to an empty string to skip that write. + + + +## Reference a contract + +The canonical place for event shapes is the top-level `contract` block. Reference it with `$contract.`; the bundler resolves the reference to a concrete schema before deploy, so the runtime transformer only ever sees resolved schemas. + + + +## Filter unwanted events + +There is no separate `ignore` or `filter` setting. To drop unwanted events (for example GTM lifecycle pings like `gtm.js` / `gtm.dom`), author an inline schema that rejects them and run `mode: "strict"`. A schema where `name` must NOT match `^gtm\.` fails those events while real events pass. + + + +## Limitations + +- The error list may contain an extra parent entry pointing at a `properties` wrapper per failure, a quirk of the underlying JSON Schema engine. The `isValid` verdict is unaffected: it is `false` exactly when there is at least one failure. +- Issues are emitted at `level: "error"`; there is no warn level. + +## Next steps + +- **[Contract](/docs/getting-started/flow/contract)** - named, inheritable event schemas referenced via `$contract` +- **[Create your own](./create-your-own)** - build custom transformers diff --git a/website/sidebars.ts b/website/sidebars.ts index 6a58134e8..6526a8e6c 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -143,6 +143,7 @@ const sidebars: SidebarsConfig = { 'transformers/bot', 'transformers/file', 'transformers/fingerprint', + 'transformers/validate', 'transformers/create-your-own', ], }, From 0cad016c77b6c4888b4f82f19a66e574ecd6e13f Mon Sep 17 00:00:00 2001 From: alexanderkirtzel Date: Tue, 9 Jun 2026 22:28:00 +0200 Subject: [PATCH 2/2] feedback --- .changeset/validate-schema-only-fix.md | 7 +++++++ .../validate/src/__tests__/validate.test.ts | 19 +++++++++++++++++++ .../transformers/validate/src/validate.ts | 10 +++++----- website/docs/transformers/validate.mdx | 2 +- 4 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 .changeset/validate-schema-only-fix.md diff --git a/.changeset/validate-schema-only-fix.md b/.changeset/validate-schema-only-fix.md new file mode 100644 index 000000000..902c551c4 --- /dev/null +++ b/.changeset/validate-schema-only-fix.md @@ -0,0 +1,7 @@ +--- +'@walkeros/transformer-validate': patch +--- + +Fix schema-only contract rules being skipped during validation. A contract rule +that carries only a whole-event `schema` (no `events` block) is now enforced +instead of being treated as an inert inline schema. diff --git a/packages/transformers/validate/src/__tests__/validate.test.ts b/packages/transformers/validate/src/__tests__/validate.test.ts index 8cb5d884a..29c5e4d21 100644 --- a/packages/transformers/validate/src/__tests__/validate.test.ts +++ b/packages/transformers/validate/src/__tests__/validate.test.ts @@ -95,6 +95,25 @@ describe('validateEventAgainstContract', () => { expect(result).toEqual({ isValid: true, errors: [] }); }); + it('enforces a schema-only contract rule (no events block)', () => { + const schemaOnlyRule = { + description: 'whole-event shape', + schema: { + type: 'object', + required: ['data'], + properties: { data: { type: 'object', required: ['id'] } }, + }, + }; + + const result = validateEventAgainstContract( + { entity: 'order', action: 'complete' }, // no `data` → must fail + undefined, + { contracts: [schemaOnlyRule] }, + ); + + expect(result.isValid).toBe(false); + }); + it('applies an inline whole-event JSON Schema (no events key)', () => { const event: WalkerOS.DeepPartialEvent = { name: 'page view', diff --git a/packages/transformers/validate/src/validate.ts b/packages/transformers/validate/src/validate.ts index 0b746888b..cbc82dc5b 100644 --- a/packages/transformers/validate/src/validate.ts +++ b/packages/transformers/validate/src/validate.ts @@ -29,12 +29,12 @@ export function getValidator( } function isContractRule(source: ContractSource): source is Flow.ContractRule { - return ( - typeof source === 'object' && - source !== null && + if (typeof source !== 'object' || source === null) return false; + const hasEvents = 'events' in source && - typeof (source as { events?: unknown }).events === 'object' - ); + typeof source.events === 'object' && + source.events !== null; + return hasEvents || 'schema' in source; } /** diff --git a/website/docs/transformers/validate.mdx b/website/docs/transformers/validate.mdx index 8ed9f346d..57f722475 100644 --- a/website/docs/transformers/validate.mdx +++ b/website/docs/transformers/validate.mdx @@ -62,7 +62,7 @@ await startFlow({ ## How it works -The transformer reads the `{ ingest, event }` context and validates the canonical event. Each `contract` entry is a constraint: a `$contract.` reference resolves to the entity-action schemas for the event, an inline JSON Schema applies to the whole event. All entries are AND-ed and every error is aggregated. Set `format: true` to additionally check the canonical `WalkerOS.Event` structure. +The transformer reads the `{ ingest, event }` context and validates the canonical event. Each `contract` entry is a constraint: a `$contract.` reference resolves to the entity-action schemas for the event, an inline JSON Schema applies to the whole event. All entries are AND-ed and every error is aggregated. Set `format: true` to additionally check the canonical partial event shape (field types and structure); required-field rules come from `contract`. `mode` decides what happens to an invalid event: