diff --git a/docs/adr/0006-neural-learner-adapter-contract.md b/docs/adr/0006-neural-learner-adapter-contract.md new file mode 100644 index 0000000..7ea4fcb --- /dev/null +++ b/docs/adr/0006-neural-learner-adapter-contract.md @@ -0,0 +1,39 @@ +# ADR 0006: Neural Learner Adapter Contract + +## Status + +Accepted + +## Context + +IgnitionRL needs DQN, PPO and SAC later, but environment authors should not see algorithm-specific internals. The existing public boundary is already the TypeScript environment contract: vector observations, action specs, rewards, done conditions, traces and checkpoints. + +M3.1 needs an adapter contract before implementing neural algorithms so Studio, CI and future native backends can agree on action-space support, update cadence, metrics and checkpoint shape. + +## Decision + +Add a `@ignitionrl/learning` neural adapter contract that is derived from `EnvironmentSpec`: + +- `NeuralObservationSpace` mirrors vector observation shape and dtype. +- `NeuralActionSpace` normalizes discrete, continuous and multi-discrete action specs. +- `NeuralUpdateCadence` models step-based updates, episode-based updates and rollout-based updates. +- `NeuralMetricSpec` defines stable metric names, scopes, reducers and optimization direction. +- `NeuralCheckpointEnvelope` wraps backend-specific checkpoint payloads in a stable JSON envelope. + +The built-in algorithm support profiles are: + +- DQN: discrete action spaces. +- PPO: discrete, continuous and multi-discrete action spaces. +- SAC: continuous action spaces. + +Unsupported action spaces fail before training starts. Custom adapters must declare supported action spaces explicitly. + +## Consequences + +Future DQN, PPO and SAC implementations can plug in without changing `defineEnvironment()`. + +Studio can reason about learner metrics and checkpoint payloads before the actual neural backend exists. + +Native backends can keep tensor payloads opaque while still producing stable IgnitionRL metadata around observations, actions, update cadence and metrics. + +The current tabular Q-learning and linear policy search baselines remain unchanged; they map conceptually to the same boundaries but are not forced through the neural adapter until a concrete neural implementation needs it. diff --git a/packages/learning/README.md b/packages/learning/README.md index 1153c42..f67ade7 100644 --- a/packages/learning/README.md +++ b/packages/learning/README.md @@ -81,6 +81,38 @@ Checkpoints are JSON-serializable and include: `linear-policy-search` keeps v1 checkpoint loading backward-compatible when newer diagnostic metrics are missing. Loading normalizes those fields before inference, so older demo artifacts can still be replayed through the current learner. +## Neural Learner Adapter Contract + +`@ignitionrl/learning` now exposes an adapter contract for future neural learners without changing environment definitions: + +```ts +import { defineNeuralLearnerAdapterContract } from "@ignitionrl/learning" +import { Target2D } from "@ignitionrl/examples" + +const contract = defineNeuralLearnerAdapterContract(Target2D.getSpec(), { + algorithm: "dqn", +}) +``` + +The contract records: + +- vector observation shape and dtype; +- discrete, continuous or multi-discrete action space details; +- algorithm support rules for DQN, PPO and SAC; +- update cadence (`step`, `episode` or `rollout`); +- stable metric names for Studio and CI; +- a JSON checkpoint envelope with an opaque backend payload. + +Built-in support profiles are intentionally conservative: + +- `dqn`: discrete action spaces; +- `ppo`: discrete, continuous and multi-discrete action spaces; +- `sac`: continuous action spaces. + +Unsupported combinations fail before a run starts with an algorithm-specific error. Custom neural adapters must declare their supported action spaces explicitly. + +Current `TabularQLearner` and `LinearPolicySearchLearner` remain direct TypeScript learners. Future DQN/PPO/SAC implementations can sit behind this contract whether the backend is TypeScript, Rust/Burn, Rust/Candle or another native process. Environment authors still only implement `defineEnvironment()`. + ## Scope Use this package to prove that environments can be trained and reloaded through the public contract. Neural RL algorithms and native training backends should come later behind the same `Learner` and checkpoint boundary. diff --git a/packages/learning/src/index.ts b/packages/learning/src/index.ts index 28d0572..39a8dcf 100644 --- a/packages/learning/src/index.ts +++ b/packages/learning/src/index.ts @@ -34,6 +34,37 @@ export { type SelectActionOptions, type TabularQOptions, } from "./tabular-q.js"; +export { + NEURAL_ADAPTER_CONTRACT_VERSION, + NEURAL_CHECKPOINT_SCHEMA_VERSION, + assertActionSpaceSupported, + assertNeuralCheckpointEnvelope, + createNeuralCheckpointEnvelope, + defaultMetricSpecs, + defineNeuralLearnerAdapterContract, + neuralActionSpaceFromSpec, + neuralAdapterContractToJson, + neuralObservationSpaceFromSpec, + type BuiltInNeuralLearnerAlgorithm, + type CreateNeuralCheckpointEnvelopeOptions, + type DefineNeuralLearnerAdapterContractOptions, + type NeuralActionSpace, + type NeuralActionSpaceKind, + type NeuralBackendDescriptor, + type NeuralCheckpointContract, + type NeuralCheckpointEnvelope, + type NeuralContinuousActionSpace, + type NeuralDiscreteActionSpace, + type NeuralLearnerAdapterContract, + type NeuralLearnerAlgorithm, + type NeuralMetricDirection, + type NeuralMetricReducer, + type NeuralMetricScope, + type NeuralMetricSpec, + type NeuralMultiDiscreteActionSpace, + type NeuralObservationSpace, + type NeuralUpdateCadence, +} from "./neural-adapter.js"; export { trainLinearPolicySearch, trainTabularQ, diff --git a/packages/learning/src/neural-adapter.ts b/packages/learning/src/neural-adapter.ts new file mode 100644 index 0000000..6d9b356 --- /dev/null +++ b/packages/learning/src/neural-adapter.ts @@ -0,0 +1,703 @@ +import { + actionSize, + validateActionSpec, + type ActionSpec, + type ContinuousActionSpec, + type DiscreteActionSpec, + type DiscreteActionValue, + type EnvironmentSpec, + type JsonObject, + type JsonValue, + type MultiDiscreteActionSpec, + type ObservationSpec, +} from "@ignitionrl/core"; + +export const NEURAL_ADAPTER_CONTRACT_VERSION = 1; +export const NEURAL_CHECKPOINT_SCHEMA_VERSION = 1; + +export type BuiltInNeuralLearnerAlgorithm = "dqn" | "ppo" | "sac"; +export type NeuralLearnerAlgorithm = BuiltInNeuralLearnerAlgorithm | (string & {}); +export type NeuralActionSpaceKind = "discrete" | "continuous" | "multi-discrete"; + +export type NeuralObservationSpace = { + readonly type: "vector"; + readonly shape: readonly [number]; + readonly dtype: "float32"; + readonly size: number; +}; + +export type NeuralDiscreteActionSpace = { + readonly type: "discrete"; + readonly size: number; + readonly n: number; + readonly values?: readonly DiscreteActionValue[]; + readonly labels?: readonly string[]; +}; + +export type NeuralContinuousActionSpace = { + readonly type: "continuous"; + readonly size: number; + readonly shape: readonly number[]; + readonly low: readonly number[]; + readonly high: readonly number[]; + readonly labels?: readonly string[]; +}; + +export type NeuralMultiDiscreteActionSpace = { + readonly type: "multi-discrete"; + readonly size: number; + readonly nvec: readonly number[]; + readonly labels?: readonly string[]; +}; + +export type NeuralActionSpace = + | NeuralDiscreteActionSpace + | NeuralContinuousActionSpace + | NeuralMultiDiscreteActionSpace; + +export type NeuralUpdateCadence = + | { + readonly type: "step"; + readonly everySteps: number; + readonly warmupSteps?: number; + readonly batchSize?: number; + } + | { + readonly type: "episode"; + readonly everyEpisodes: number; + readonly batchSize?: number; + } + | { + readonly type: "rollout"; + readonly rolloutSteps: number; + readonly epochs?: number; + readonly minibatches?: number; + }; + +export type NeuralMetricScope = "episode" | "update" | "rollout" | "run"; +export type NeuralMetricDirection = "maximize" | "minimize" | "none"; +export type NeuralMetricReducer = "last" | "mean" | "min" | "max" | "sum"; + +export type NeuralMetricSpec = { + readonly name: string; + readonly scope: NeuralMetricScope; + readonly direction: NeuralMetricDirection; + readonly reducer: NeuralMetricReducer; + readonly unit?: string; + readonly description?: string; +}; + +export type NeuralBackendDescriptor = { + readonly kind: "typescript" | "native" | "remote"; + readonly name: string; + readonly capabilities?: readonly string[]; +}; + +export type NeuralCheckpointContract = { + readonly schemaVersion: typeof NEURAL_CHECKPOINT_SCHEMA_VERSION; + readonly format: "ignitionrl.neural-checkpoint"; + readonly payload: "opaque-json"; + readonly requiredEnvelopeFields: readonly string[]; +}; + +export type NeuralLearnerAdapterContract = { + readonly kind: "ignitionrl.neural-learner-adapter-contract"; + readonly schemaVersion: typeof NEURAL_ADAPTER_CONTRACT_VERSION; + readonly algorithm: string; + readonly envId: string; + readonly observation: NeuralObservationSpace; + readonly actionSpace: NeuralActionSpace; + readonly supportedActionSpaces: readonly NeuralActionSpaceKind[]; + readonly updateCadence: NeuralUpdateCadence; + readonly metrics: readonly NeuralMetricSpec[]; + readonly checkpoint: NeuralCheckpointContract; + readonly backend?: NeuralBackendDescriptor; +}; + +export type DefineNeuralLearnerAdapterContractOptions = { + readonly algorithm: NeuralLearnerAlgorithm; + readonly supportedActionSpaces?: readonly NeuralActionSpaceKind[]; + readonly updateCadence?: NeuralUpdateCadence; + readonly metrics?: readonly NeuralMetricSpec[]; + readonly backend?: NeuralBackendDescriptor; +}; + +export type NeuralCheckpointEnvelope = { + readonly kind: "ignitionrl.neural-learner-checkpoint"; + readonly schemaVersion: typeof NEURAL_CHECKPOINT_SCHEMA_VERSION; + readonly algorithm: string; + readonly envId: string; + readonly observation: NeuralObservationSpace; + readonly actionSpace: NeuralActionSpace; + readonly updateCadence: NeuralUpdateCadence; + readonly metrics: Readonly>; + readonly payload: JsonObject; + readonly createdAt: string; + readonly backend?: NeuralBackendDescriptor; + readonly metadata?: JsonObject; +}; + +export type CreateNeuralCheckpointEnvelopeOptions = { + readonly metrics?: Readonly>; + readonly createdAt?: string; + readonly backend?: NeuralBackendDescriptor; + readonly metadata?: JsonObject; +}; + +const REQUIRED_CHECKPOINT_FIELDS = [ + "kind", + "schemaVersion", + "algorithm", + "envId", + "observation", + "actionSpace", + "updateCadence", + "metrics", + "payload", + "createdAt", +] as const; + +const BUILT_IN_ACTION_SUPPORT: Record = { + dqn: ["discrete"], + ppo: ["discrete", "continuous", "multi-discrete"], + sac: ["continuous"], +}; + +export function defineNeuralLearnerAdapterContract( + spec: EnvironmentSpec, + options: DefineNeuralLearnerAdapterContractOptions, +): NeuralLearnerAdapterContract { + const observation = neuralObservationSpaceFromSpec(spec.observation); + const actionSpace = neuralActionSpaceFromSpec(spec.actions); + const supportedActionSpaces = supportedActionSpacesFor(options.algorithm, options.supportedActionSpaces); + + assertActionSpaceSupported(options.algorithm, actionSpace.type, supportedActionSpaces); + + const updateCadence = options.updateCadence ?? defaultUpdateCadence(options.algorithm); + + validateUpdateCadence(updateCadence); + + const contract: NeuralLearnerAdapterContract = { + kind: "ignitionrl.neural-learner-adapter-contract", + schemaVersion: NEURAL_ADAPTER_CONTRACT_VERSION, + algorithm: options.algorithm, + envId: spec.id, + observation, + actionSpace, + supportedActionSpaces, + updateCadence, + metrics: options.metrics ?? defaultMetricSpecs(options.algorithm), + checkpoint: { + schemaVersion: NEURAL_CHECKPOINT_SCHEMA_VERSION, + format: "ignitionrl.neural-checkpoint", + payload: "opaque-json", + requiredEnvelopeFields: REQUIRED_CHECKPOINT_FIELDS, + }, + ...(options.backend !== undefined ? { backend: options.backend } : {}), + }; + + validateMetricSpecs(contract.metrics); + + return contract; +} + +export function neuralObservationSpaceFromSpec(spec: ObservationSpec): NeuralObservationSpace { + if (spec.type !== "vector") { + throw new Error(`[IgnitionRL] Neural learners require vector observations, got ${String(spec.type)}.`); + } + + const size = spec.shape[0]; + + if (!Number.isInteger(size) || size <= 0) { + throw new Error(`[IgnitionRL] Neural learner observation size must be positive, got ${String(size)}.`); + } + + return { + type: "vector", + shape: [size], + dtype: "float32", + size, + }; +} + +export function neuralActionSpaceFromSpec(spec: ActionSpec): NeuralActionSpace { + validateActionSpec(spec); + + if (spec.type === "discrete") { + return neuralDiscreteActionSpace(spec); + } + + if (spec.type === "continuous") { + return neuralContinuousActionSpace(spec); + } + + return neuralMultiDiscreteActionSpace(spec); +} + +export function assertActionSpaceSupported( + algorithm: string, + actionSpace: NeuralActionSpaceKind, + supportedActionSpaces: readonly NeuralActionSpaceKind[] = supportedActionSpacesFor(algorithm), +): void { + if (!supportedActionSpaces.includes(actionSpace)) { + throw new Error( + `[IgnitionRL] ${algorithm} does not support ${actionSpace} action spaces. Supported action spaces: ${supportedActionSpaces.join(", ")}.`, + ); + } +} + +export function createNeuralCheckpointEnvelope( + contract: NeuralLearnerAdapterContract, + payload: JsonObject, + options: CreateNeuralCheckpointEnvelopeOptions = {}, +): NeuralCheckpointEnvelope { + const envelope: NeuralCheckpointEnvelope = { + kind: "ignitionrl.neural-learner-checkpoint", + schemaVersion: NEURAL_CHECKPOINT_SCHEMA_VERSION, + algorithm: contract.algorithm, + envId: contract.envId, + observation: contract.observation, + actionSpace: contract.actionSpace, + updateCadence: contract.updateCadence, + metrics: validateMetricValues(options.metrics ?? {}), + payload, + createdAt: options.createdAt ?? new Date().toISOString(), + ...(options.backend !== undefined + ? { backend: options.backend } + : contract.backend !== undefined + ? { backend: contract.backend } + : {}), + ...(options.metadata !== undefined ? { metadata: options.metadata } : {}), + }; + + assertNeuralCheckpointEnvelope(envelope, contract); + + return envelope; +} + +export function assertNeuralCheckpointEnvelope( + value: unknown, + contract?: NeuralLearnerAdapterContract, +): asserts value is NeuralCheckpointEnvelope { + if (!isRecord(value)) { + throw new Error("[IgnitionRL] Neural checkpoint envelope must be an object."); + } + + if (value.kind !== "ignitionrl.neural-learner-checkpoint") { + throw new Error(`[IgnitionRL] Unsupported neural checkpoint kind: ${String(value.kind)}.`); + } + + if (value.schemaVersion !== NEURAL_CHECKPOINT_SCHEMA_VERSION) { + throw new Error(`[IgnitionRL] Unsupported neural checkpoint schema version: ${String(value.schemaVersion)}.`); + } + + if (typeof value.algorithm !== "string" || value.algorithm.trim().length === 0) { + throw new Error("[IgnitionRL] Neural checkpoint algorithm must be a non-empty string."); + } + + if (typeof value.envId !== "string" || value.envId.trim().length === 0) { + throw new Error("[IgnitionRL] Neural checkpoint envId must be a non-empty string."); + } + + if (!isRecord(value.payload)) { + throw new Error("[IgnitionRL] Neural checkpoint payload must be a JSON object."); + } + + validateMetricValues(recordOfNumbers(value.metrics, "Neural checkpoint metrics")); + + if (typeof value.createdAt !== "string" || value.createdAt.trim().length === 0) { + throw new Error("[IgnitionRL] Neural checkpoint createdAt must be a non-empty string."); + } + + if (contract !== undefined) { + if (value.algorithm !== contract.algorithm) { + throw new Error( + `[IgnitionRL] Neural checkpoint algorithm ${value.algorithm} does not match adapter ${contract.algorithm}.`, + ); + } + + if (value.envId !== contract.envId) { + throw new Error( + `[IgnitionRL] Neural checkpoint envId ${value.envId} does not match environment ${contract.envId}.`, + ); + } + + const actionSpace = neuralActionSpaceFromUnknown(value.actionSpace); + if (actionSpace.type !== contract.actionSpace.type || actionSpace.size !== contract.actionSpace.size) { + throw new Error("[IgnitionRL] Neural checkpoint action space does not match adapter contract."); + } + + const observation = neuralObservationSpaceFromUnknown(value.observation); + if (observation.size !== contract.observation.size) { + throw new Error("[IgnitionRL] Neural checkpoint observation size does not match adapter contract."); + } + } +} + +export function defaultMetricSpecs(algorithm: NeuralLearnerAlgorithm): readonly NeuralMetricSpec[] { + const common: NeuralMetricSpec[] = [ + { + name: "episodeReward", + scope: "episode", + direction: "maximize", + reducer: "mean", + description: "Episode total reward emitted by the environment loop.", + }, + { + name: "episodeLength", + scope: "episode", + direction: "minimize", + reducer: "mean", + unit: "steps", + }, + { + name: "successRate", + scope: "run", + direction: "maximize", + reducer: "mean", + }, + ]; + + if (algorithm === "dqn") { + return [ + ...common, + { name: "tdLoss", scope: "update", direction: "minimize", reducer: "mean" }, + { name: "epsilon", scope: "update", direction: "none", reducer: "last" }, + { name: "replayBufferSize", scope: "update", direction: "none", reducer: "last" }, + ]; + } + + if (algorithm === "ppo") { + return [ + ...common, + { name: "policyLoss", scope: "update", direction: "minimize", reducer: "mean" }, + { name: "valueLoss", scope: "update", direction: "minimize", reducer: "mean" }, + { name: "entropy", scope: "update", direction: "none", reducer: "mean" }, + { name: "approxKl", scope: "update", direction: "minimize", reducer: "mean" }, + ]; + } + + if (algorithm === "sac") { + return [ + ...common, + { name: "actorLoss", scope: "update", direction: "minimize", reducer: "mean" }, + { name: "criticLoss", scope: "update", direction: "minimize", reducer: "mean" }, + { name: "temperature", scope: "update", direction: "none", reducer: "last" }, + ]; + } + + return common; +} + +function neuralDiscreteActionSpace(spec: DiscreteActionSpec): NeuralDiscreteActionSpace { + const values = "values" in spec ? spec.values : undefined; + const n = values === undefined ? spec.n : values.length; + + if (n === undefined || !Number.isInteger(n) || n <= 0) { + throw new Error("[IgnitionRL] Discrete neural action space must have a positive action count."); + } + + return { + type: "discrete", + size: 1, + n, + ...(values !== undefined ? { values: [...values] } : {}), + ...(spec.labels !== undefined ? { labels: [...spec.labels] } : {}), + }; +} + +function neuralContinuousActionSpace(spec: ContinuousActionSpec): NeuralContinuousActionSpace { + const size = actionSize(spec); + + return { + type: "continuous", + size, + shape: [...spec.shape], + low: normalizeBounds(spec.low, size, -1), + high: normalizeBounds(spec.high, size, 1), + ...(spec.labels !== undefined ? { labels: [...spec.labels] } : {}), + }; +} + +function neuralMultiDiscreteActionSpace(spec: MultiDiscreteActionSpec): NeuralMultiDiscreteActionSpace { + return { + type: "multi-discrete", + size: actionSize(spec), + nvec: [...spec.nvec], + ...(spec.labels !== undefined ? { labels: [...spec.labels] } : {}), + }; +} + +function normalizeBounds( + bound: number | readonly number[] | undefined, + size: number, + fallback: number, +): readonly number[] { + if (bound === undefined) { + return Array.from({ length: size }, () => fallback); + } + + if (typeof bound === "number") { + return Array.from({ length: size }, () => bound); + } + + return [...bound]; +} + +function supportedActionSpacesFor( + algorithm: string, + explicitSupport?: readonly NeuralActionSpaceKind[], +): readonly NeuralActionSpaceKind[] { + if (explicitSupport !== undefined) { + validateSupportedActionSpaces(explicitSupport, algorithm); + return [...explicitSupport]; + } + + if (isBuiltInAlgorithm(algorithm)) { + return BUILT_IN_ACTION_SUPPORT[algorithm]; + } + + throw new Error( + `[IgnitionRL] Custom neural learner adapter ${algorithm} must declare supported action spaces.`, + ); +} + +function defaultUpdateCadence(algorithm: string): NeuralUpdateCadence { + if (algorithm === "dqn" || algorithm === "sac") { + return { + type: "step", + everySteps: 1, + warmupSteps: 1_000, + batchSize: 64, + }; + } + + if (algorithm === "ppo") { + return { + type: "rollout", + rolloutSteps: 2_048, + epochs: 10, + minibatches: 32, + }; + } + + return { + type: "episode", + everyEpisodes: 1, + }; +} + +function validateUpdateCadence(cadence: NeuralUpdateCadence): void { + if (cadence.type === "step") { + assertPositiveInteger(cadence.everySteps, "updateCadence.everySteps"); + if (cadence.warmupSteps !== undefined) { + assertNonNegativeInteger(cadence.warmupSteps, "updateCadence.warmupSteps"); + } + if (cadence.batchSize !== undefined) { + assertPositiveInteger(cadence.batchSize, "updateCadence.batchSize"); + } + return; + } + + if (cadence.type === "episode") { + assertPositiveInteger(cadence.everyEpisodes, "updateCadence.everyEpisodes"); + if (cadence.batchSize !== undefined) { + assertPositiveInteger(cadence.batchSize, "updateCadence.batchSize"); + } + return; + } + + if (cadence.type === "rollout") { + assertPositiveInteger(cadence.rolloutSteps, "updateCadence.rolloutSteps"); + if (cadence.epochs !== undefined) { + assertPositiveInteger(cadence.epochs, "updateCadence.epochs"); + } + if (cadence.minibatches !== undefined) { + assertPositiveInteger(cadence.minibatches, "updateCadence.minibatches"); + } + return; + } + + const unknown = cadence as { readonly type?: unknown }; + throw new Error(`[IgnitionRL] Unsupported neural update cadence: ${String(unknown.type)}.`); +} + +function validateMetricSpecs(metrics: readonly NeuralMetricSpec[]): void { + if (metrics.length === 0) { + throw new Error("[IgnitionRL] Neural learner metrics must include at least one metric."); + } + + const names = new Set(); + + for (const metric of metrics) { + if (metric.name.trim().length === 0) { + throw new Error("[IgnitionRL] Neural learner metric names must be non-empty."); + } + + if (names.has(metric.name)) { + throw new Error(`[IgnitionRL] Duplicate neural learner metric name: ${metric.name}.`); + } + + names.add(metric.name); + } +} + +function validateMetricValues(metrics: Readonly>): Readonly> { + for (const [name, value] of Object.entries(metrics)) { + if (!Number.isFinite(value)) { + throw new Error(`[IgnitionRL] Neural checkpoint metric ${name} must be finite.`); + } + } + + return { ...metrics }; +} + +function validateSupportedActionSpaces( + values: readonly NeuralActionSpaceKind[], + algorithm: string, +): void { + if (values.length === 0) { + throw new Error(`[IgnitionRL] Neural learner adapter ${algorithm} must support at least one action space.`); + } + + const valid = new Set(["discrete", "continuous", "multi-discrete"]); + + for (const value of values) { + if (!valid.has(value)) { + throw new Error(`[IgnitionRL] Unsupported neural action space: ${String(value)}.`); + } + } +} + +function neuralObservationSpaceFromUnknown(value: unknown): NeuralObservationSpace { + if (!isRecord(value)) { + throw new Error("[IgnitionRL] Neural checkpoint observation must be an object."); + } + + const shape = value.shape; + + if ( + value.type !== "vector" + || value.dtype !== "float32" + || !Array.isArray(shape) + || shape.length !== 1 + || !Number.isInteger(shape[0]) + || shape[0] <= 0 + ) { + throw new Error("[IgnitionRL] Neural checkpoint observation spec is invalid."); + } + + return { + type: "vector", + shape: [shape[0]], + dtype: "float32", + size: shape[0], + }; +} + +function neuralActionSpaceFromUnknown(value: unknown): NeuralActionSpace { + if (!isRecord(value)) { + throw new Error("[IgnitionRL] Neural checkpoint action space must be an object."); + } + + if (value.type === "discrete") { + const n = numberFromUnknown(value.n); + + if (!Number.isInteger(n) || n <= 0) { + throw new Error("[IgnitionRL] Neural checkpoint discrete action space is invalid."); + } + + return { + type: "discrete", + size: 1, + n, + }; + } + + if (value.type === "continuous") { + const size = numberFromUnknown(value.size); + const shape = arrayOfNumbers(value.shape, "continuous action shape"); + const low = arrayOfNumbers(value.low, "continuous action low"); + const high = arrayOfNumbers(value.high, "continuous action high"); + + if (!Number.isInteger(size) || size <= 0) { + throw new Error("[IgnitionRL] Neural checkpoint continuous action size is invalid."); + } + + return { + type: "continuous", + size, + shape, + low, + high, + }; + } + + if (value.type === "multi-discrete") { + const nvec = arrayOfNumbers(value.nvec, "multi-discrete nvec"); + + return { + type: "multi-discrete", + size: nvec.length, + nvec, + }; + } + + throw new Error(`[IgnitionRL] Unsupported neural checkpoint action space: ${String(value.type)}.`); +} + +function recordOfNumbers(value: unknown, label: string): Readonly> { + if (!isRecord(value)) { + throw new Error(`[IgnitionRL] ${label} must be an object.`); + } + + const output: Record = {}; + + for (const [name, metric] of Object.entries(value)) { + if (typeof metric !== "number" || !Number.isFinite(metric)) { + throw new Error(`[IgnitionRL] ${label} ${name} must be finite.`); + } + + output[name] = metric; + } + + return output; +} + +function arrayOfNumbers(value: unknown, label: string): readonly number[] { + if (!Array.isArray(value) || value.some((entry) => typeof entry !== "number" || !Number.isFinite(entry))) { + throw new Error(`[IgnitionRL] Neural checkpoint ${label} must be a finite number array.`); + } + + return [...value]; +} + +function numberFromUnknown(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) ? value : Number.NaN; +} + +function assertPositiveInteger(value: number, label: string): void { + if (!Number.isInteger(value) || value <= 0) { + throw new Error(`[IgnitionRL] ${label} must be a positive integer.`); + } +} + +function assertNonNegativeInteger(value: number, label: string): void { + if (!Number.isInteger(value) || value < 0) { + throw new Error(`[IgnitionRL] ${label} must be a non-negative integer.`); + } +} + +function isBuiltInAlgorithm(value: string): value is BuiltInNeuralLearnerAlgorithm { + return value === "dqn" || value === "ppo" || value === "sac"; +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +export function neuralAdapterContractToJson( + contract: NeuralLearnerAdapterContract, +): JsonValue { + return contract as unknown as JsonValue; +} diff --git a/packages/learning/test/neural-adapter.test.ts b/packages/learning/test/neural-adapter.test.ts new file mode 100644 index 0000000..545960d --- /dev/null +++ b/packages/learning/test/neural-adapter.test.ts @@ -0,0 +1,184 @@ +import { describe, expect, test } from "bun:test"; +import type { EnvironmentSpec } from "@ignitionrl/core"; +import { + assertNeuralCheckpointEnvelope, + createNeuralCheckpointEnvelope, + defineNeuralLearnerAdapterContract, + neuralAdapterContractToJson, +} from "../src/index.js"; + +describe("neural learner adapter contract", () => { + test("describes a DQN-compatible discrete environment without changing the environment spec", () => { + const contract = defineNeuralLearnerAdapterContract(discreteSpec(), { + algorithm: "dqn", + backend: { + kind: "typescript", + name: "debug-dqn", + }, + }); + + expect(contract.kind).toBe("ignitionrl.neural-learner-adapter-contract"); + expect(contract.envId).toBe("Target2D-v0"); + expect(contract.observation.size).toBe(4); + expect(contract.actionSpace.type).toBe("discrete"); + expect(contract.actionSpace.n).toBe(4); + expect(contract.supportedActionSpaces).toEqual(["discrete"]); + expect(contract.updateCadence).toMatchObject({ + type: "step", + everySteps: 1, + warmupSteps: 1000, + batchSize: 64, + }); + expect(contract.metrics.map((metric) => metric.name)).toContain("tdLoss"); + expect(neuralAdapterContractToJson(contract)).toMatchObject({ + algorithm: "dqn", + envId: "Target2D-v0", + }); + }); + + test("supports PPO continuous and multi-discrete action spaces for future algorithms", () => { + const continuous = defineNeuralLearnerAdapterContract(continuousSpec(), { + algorithm: "ppo", + }); + const multiDiscrete = defineNeuralLearnerAdapterContract(multiDiscreteSpec(), { + algorithm: "ppo", + }); + + expect(continuous.actionSpace).toMatchObject({ + type: "continuous", + size: 2, + shape: [2], + low: [-1, -1], + high: [1, 1], + }); + expect(continuous.updateCadence).toMatchObject({ + type: "rollout", + rolloutSteps: 2048, + }); + expect(multiDiscrete.actionSpace).toMatchObject({ + type: "multi-discrete", + size: 2, + nvec: [3, 4], + }); + }); + + test("fails unsupported action spaces with algorithm-specific messages", () => { + expect(() => + defineNeuralLearnerAdapterContract(continuousSpec(), { algorithm: "dqn" }) + ).toThrow("dqn does not support continuous action spaces"); + + expect(() => + defineNeuralLearnerAdapterContract(discreteSpec(), { algorithm: "sac" }) + ).toThrow("sac does not support discrete action spaces"); + + expect(() => + defineNeuralLearnerAdapterContract(discreteSpec(), { algorithm: "custom-neural" }) + ).toThrow("must declare supported action spaces"); + }); + + test("creates and validates JSON checkpoint envelopes against the adapter contract", () => { + const contract = defineNeuralLearnerAdapterContract(continuousSpec(), { + algorithm: "sac", + backend: { + kind: "native", + name: "burn", + capabilities: ["train", "checkpoint"], + }, + }); + const envelope = createNeuralCheckpointEnvelope(contract, { + tensors: { + policy: "runs/sac/checkpoints/policy.safetensors", + }, + }, { + createdAt: "2026-05-29T00:00:00.000Z", + metrics: { + episodeReward: 12, + actorLoss: 0.1, + }, + metadata: { + source: "unit-test", + }, + }); + + expect(envelope.kind).toBe("ignitionrl.neural-learner-checkpoint"); + expect(envelope.backend?.name).toBe("burn"); + expect(envelope.payload.tensors).toMatchObject({ + policy: "runs/sac/checkpoints/policy.safetensors", + }); + expect(() => assertNeuralCheckpointEnvelope(envelope, contract)).not.toThrow(); + }); + + test("rejects checkpoint envelopes that do not match the adapter contract", () => { + const contract = defineNeuralLearnerAdapterContract(discreteSpec(), { + algorithm: "dqn", + }); + const envelope = createNeuralCheckpointEnvelope(contract, { + qNetwork: "runs/dqn/checkpoints/q.json", + }); + + expect(() => + assertNeuralCheckpointEnvelope({ + ...envelope, + envId: "OtherEnv-v0", + }, contract) + ).toThrow("does not match environment Target2D-v0"); + + expect(() => + assertNeuralCheckpointEnvelope({ + ...envelope, + metrics: { + tdLoss: Number.NaN, + }, + }, contract) + ).toThrow("must be finite"); + }); +}); + +function discreteSpec(): EnvironmentSpec { + return { + id: "Target2D-v0", + observation: { + type: "vector", + shape: [4], + dtype: "float32", + }, + actions: { + type: "discrete", + values: ["up", "down", "left", "right"], + labels: ["Up", "Down", "Left", "Right"], + }, + }; +} + +function continuousSpec(): EnvironmentSpec { + return { + id: "DroneTarget-v0", + observation: { + type: "vector", + shape: [12], + dtype: "float32", + }, + actions: { + type: "continuous", + shape: [2], + low: -1, + high: 1, + labels: ["thrust", "yaw"], + }, + }; +} + +function multiDiscreteSpec(): EnvironmentSpec { + return { + id: "SwitchBoard-v0", + observation: { + type: "vector", + shape: [6], + dtype: "float32", + }, + actions: { + type: "multi-discrete", + nvec: [3, 4], + }, + }; +} diff --git a/packages/learning/test/type-inference.ts b/packages/learning/test/type-inference.ts index 9f44db1..85b4a77 100644 --- a/packages/learning/test/type-inference.ts +++ b/packages/learning/test/type-inference.ts @@ -1,10 +1,13 @@ import { defineEnvironment, reward, type ActionFromSpec } from "@ignitionrl/core"; import { createLinearPolicySearchLearner, + createNeuralCheckpointEnvelope, createTabularQLearner, + defineNeuralLearnerAdapterContract, trainLinearPolicySearch, trainTabularQ, type LinearPolicySearchCheckpoint, + type NeuralLearnerAdapterContract, type TabularQCheckpoint, } from "../src/index.js"; @@ -44,6 +47,13 @@ type TinyAction = ActionFromSpec; const learner = createTabularQLearner(); const continuousLearner = createLinearPolicySearchLearner(); +const neuralContract: NeuralLearnerAdapterContract = defineNeuralLearnerAdapterContract( + TinyEnv.getSpec(), + { algorithm: "dqn" }, +); +const neuralCheckpoint = createNeuralCheckpointEnvelope(neuralContract, { + qNetwork: "runs/tiny/checkpoints/q-network.json", +}); const checkpoint: TabularQCheckpoint = { version: 1, algorithm: "tabular-q-learning", @@ -119,6 +129,9 @@ async function smoke(): Promise { const acceptsTinyAction = (_action: "inc" | "dec") => undefined; acceptsTinyAction(action); + const acceptsNeuralCheckpointEnv = (_envId: "Tiny-v0") => undefined; + + acceptsNeuralCheckpointEnv(neuralCheckpoint.envId as "Tiny-v0"); learner.loadCheckpoint(checkpoint, TinyEnv.getSpec()); await continuousLearner.init(TinyContinuousEnv.getSpec(), {});