From c71409bc5b2becc0c41bd16029cfbc94381cc5a6 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:37:31 +0200 Subject: [PATCH 01/48] Add entries to .gitignore for competence-matcher build and database files and models --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 81e501ecf..a8d014595 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,8 @@ gha-creds-*.json # Ignore custom claude project files CLAUDE.md + +# Matching models & DB +./src/competence-matcher/dist/ +./src/competence-matcher/src/db/dbs/ +./src/competence-matcher/src/models/ \ No newline at end of file From dc1afc67910271fc1bf6cb08bdf656c5056613ab Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:39:18 +0200 Subject: [PATCH 02/48] Add competence-matcher configuration files and update .gitignore --- .gitignore | 6 +- src/competence-matcher/.env | 0 src/competence-matcher/openAPI.json | 720 +++++++++++++++++++++++++++ src/competence-matcher/package.json | 40 ++ src/competence-matcher/src/config.ts | 19 + src/competence-matcher/tsconfig.json | 13 + 6 files changed, 795 insertions(+), 3 deletions(-) create mode 100644 src/competence-matcher/.env create mode 100644 src/competence-matcher/openAPI.json create mode 100644 src/competence-matcher/package.json create mode 100644 src/competence-matcher/src/config.ts create mode 100644 src/competence-matcher/tsconfig.json diff --git a/.gitignore b/.gitignore index a8d014595..d3704ec24 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,6 @@ gha-creds-*.json CLAUDE.md # Matching models & DB -./src/competence-matcher/dist/ -./src/competence-matcher/src/db/dbs/ -./src/competence-matcher/src/models/ \ No newline at end of file +src/competence-matcher/dist +src/competence-matcher/src/db/dbs +src/competence-matcher/src/models \ No newline at end of file diff --git a/src/competence-matcher/.env b/src/competence-matcher/.env new file mode 100644 index 000000000..e69de29bb diff --git a/src/competence-matcher/openAPI.json b/src/competence-matcher/openAPI.json new file mode 100644 index 000000000..dc04d3fad --- /dev/null +++ b/src/competence-matcher/openAPI.json @@ -0,0 +1,720 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Matching Server API", + "version": "0.1.0", + "description": "API for managing resource competence lists and matching tasks to resources." + }, + "servers": [ + { + "url": "http://localhost:8501" + } + ], + "paths": { + "/": { + "get": { + "tags": [ + "Default" + ], + "summary": "Welcome endpoint", + "responses": { + "200": { + "description": "Hello World!", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + } + } + } + } + }, + "/resource-competence-list": { + "get": { + "tags": [ + "Resources" + ], + "summary": "Get all resource lists (only if multipleDBs=true)", + "responses": { + "200": { + "description": "Array of resource list IDs", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + } + } + }, + "/resource-competence-list/jobs": { + "post": { + "tags": [ + "Resources" + ], + "summary": "Create a resource competence list job", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ResourceInput" + } + } + } + } + }, + "responses": { + "202": { + "description": "Job accepted", + "headers": { + "Location": { + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" + } + } + } + } + } + } + }, + "/resource-competence-list/jobs/{jobId}": { + "get": { + "tags": [ + "Resources" + ], + "summary": "Get status of a resource creation job", + "parameters": [ + { + "name": "jobId", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "202": { + "description": "Job pending/preprocessing/running", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" + } + } + } + }, + "201": { + "description": "Completed with competenceListId", + "headers": { + "Location": { + "schema": { + "type": "string" + }, + "description": "Complete Path to created Competence-List" + } + }, + "content": { + "application/json": { + "schema": { + "allOf": [ + { + "$ref": "#/components/schemas/JobResponse" + }, + { + "type": "object", + "properties": { + "competenceListId": { + "type": "string" + } + }, + "required": [ + "competenceListId" + ] + } + ] + } + } + } + }, + "500": { + "description": "Job failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" + } + } + } + } + } + } + }, + "/resource-competence-list/{competenceListId}": { + "get": { + "tags": [ + "Resources" + ], + "summary": "Get a specific resource competence list", + "parameters": [ + { + "name": "competenceListId", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Resource list details", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ResourceList" + } + } + } + }, + "400": { + "description": "Missing resourceListId" + }, + "404": { + "description": "Not found" + } + } + } + }, + "/matching-task-to-resource/jobs": { + "post": { + "tags": [ + "Matching" + ], + "summary": "Create a matching job", + "description": "Start a matching job by providing either an existing competenceListId or an inline competenceList.", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/MatchByListIdRequest" + }, + { + "$ref": "#/components/schemas/MatchByListRequest" + } + ] + }, + "examples": { + "Match existing list": { + "summary": "Use an existing list ID", + "value": { + "competenceListId": "123e4567-e89b-12d3-a456-426614174000", + "tasks": [ + { + "taskId": "task1", + "name": "Task 1", + "description": "This is what we currently use for matching", + "executionInstructions": "Some execution instructions", + "requiredCompetencies": [ + { + "competenceId": "comp1", + "name": "Competence 1", + "description": "Description of competence 1", + "externalQualificationNeeded": false, + "renewTime": 30, + "proficiencyLevel": "advanced", + "qualificationDates": [ + "2025-07-01" + ], + "lastUsages": [ + "2025-07-10T14:30:00Z" + ] + } + ] + } + ] + } + }, + "Match inline list": { + "summary": "Pass a full competenceList inline", + "value": { + "competenceList": [ + { + "resourceId": "string", + "competencies": [ + { + "competenceId": "string", + "name": "string", + "description": "string", + "externalQualificationNeeded": true, + "renewTime": 0, + "proficiencyLevel": "string", + "qualificationDates": [ + "2025-07-15" + ], + "lastUsages": [ + "2025-07-15T10:37:09.695Z" + ] + } + ] + } + ], + "tasks": [ + { + "taskId": "task1", + "name": "Task 1", + "description": "This is what we currently use for matching", + "executionInstructions": "Some execution instructions", + "requiredCompetencies": [ + { + "competenceId": "comp1", + "name": "Competence 1", + "description": "Description of competence 1", + "externalQualificationNeeded": false, + "renewTime": 30, + "proficiencyLevel": "advanced", + "qualificationDates": [ + "2025-07-01" + ], + "lastUsages": [ + "2025-07-10T14:30:00Z" + ] + } + ] + } + ] + } + } + } + } + } + }, + "responses": { + "202": { + "description": "Match job accepted", + "headers": { + "Location": { + "schema": { + "type": "string" + }, + "description": "Complete Path to created Competence-List" + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" + } + } + } + } + } + } + }, + "/matching-task-to-resource/jobs/{jobId}": { + "get": { + "tags": [ + "Matching" + ], + "summary": "Get match job results", + "parameters": [ + { + "name": "jobId", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "rankBy", + "in": "query", + "required": false, + "schema": { + "type": "string", + "enum": [ + "avgFit", + "bestFit" + ] + }, + "description": "Optional ranking method: 'avgFit' or 'bestFit'" + } + ], + "responses": { + "202": { + "description": "Job pending/running", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" + } + } + } + }, + "200": { + "description": "Match results", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GroupedMatchResults" + } + } + } + }, + "404": { + "description": "Job not found" + }, + "500": { + "description": "Job failed" + } + } + } + } + }, + "components": { + "schemas": { + "ResourceInput": { + "type": "object", + "required": [ + "resourceId", + "competencies" + ], + "properties": { + "resourceId": { + "type": "string" + }, + "competencies": { + "type": "array", + "items": { + "$ref": "#/components/schemas/CompetenceInput" + } + } + } + }, + "CompetenceInput": { + "type": "object", + "required": [ + "competenceId" + ], + "properties": { + "competenceId": { + "type": "string" + }, + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "externalQualificationNeeded": { + "type": "boolean" + }, + "renewTime": { + "type": "number" + }, + "proficiencyLevel": { + "type": "string" + }, + "qualificationDates": { + "type": "array", + "items": { + "type": "string", + "format": "date" + } + }, + "lastUsages": { + "type": "array", + "items": { + "type": "string", + "format": "date-time" + } + } + } + }, + "ResourceList": { + "type": "object", + "required": [ + "competenceListId", + "resources" + ], + "properties": { + "competenceListId": { + "type": "string" + }, + "resources": { + "type": "array", + "items": { + "type": "object", + "required": [ + "resourceId", + "competencies" + ], + "properties": { + "resourceId": { + "type": "string" + }, + "competencies": { + "type": "array", + "items": { + "$ref": "#/components/schemas/CompetenceInput" + } + } + } + } + } + } + }, + "JobResponse": { + "type": "object", + "required": [ + "jobId", + "status" + ], + "properties": { + "jobId": { + "type": "string" + }, + "status": { + "type": "string" + } + } + }, + "MatchByListIdRequest": { + "type": "object", + "required": [ + "competenceListId", + "tasks" + ], + "properties": { + "competenceListId": { + "type": "string" + }, + "tasks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MatchingTask" + } + } + } + }, + "MatchByListRequest": { + "type": "object", + "required": [ + "competenceList", + "tasks" + ], + "properties": { + "competenceList": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ResourceInput" + } + }, + "tasks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MatchingTask" + } + } + } + }, + "MatchingTask": { + "type": "object", + "required": [ + "taskId" + ], + "properties": { + "taskId": { + "type": "string" + }, + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "executionInstructions": { + "type": "string" + }, + "requiredCompetencies": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/CompetenceInput" + } + ] + } + } + } + }, + "GroupedMatchResults": { + "type": "object", + "required": [ + "taskOverview", + "resourceRanking" + ], + "properties": { + "tasks": { + "$ref": "#/components/schemas/TaskOverview" + }, + "resourceRanking": { + "$ref": "#/components/schemas/ResourceRanking" + } + } + }, + "TaskOverview": { + "type": "array", + "items": { + "type": "object", + "required": [ + "taskId", + "taskText" + ], + "properties": { + "taskId": { + "type": "string" + }, + "taskText": { + "type": "string" + } + } + } + }, + "ResourceRanking": { + "type": "array", + "items": { + "type": "object", + "required": [ + "resourceId", + "taskMatchings", + "avgTaskMatchProbability" + ], + "properties": { + "resourceId": { + "type": "string" + }, + "taskMatchings": { + "type": "array", + "items": { + "type": "object", + "required": [ + "taskId", + "taskText", + "competenceMatchings", + "maxMatchProbability" + ], + "properties": { + "taskId": { + "type": "string" + }, + "competenceMatchings": { + "type": "array", + "items": { + "type": "object", + "required": [ + "competenceId", + "matchings", + "avgMatchProbability" + ], + "properties": { + "competenceId": { + "type": "string" + }, + "matchings": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MatchDetail" + } + }, + "avgMatchProbability": { + "type": "number" + }, + "avgBestFitTaskMatchProbability": { + "type": "number" + } + } + } + }, + "maxMatchProbability": { + "type": "number" + }, + "maxBestFitMatchProbability": { + "type": "number" + } + } + } + }, + "avgTaskMatchProbability": { + "type": "number" + }, + "avgBestFitTaskMatchProbability": { + "type": "number" + }, + "contradicting": { + "type": "boolean" + } + } + } + }, + "MatchDetail": { + "type": "object", + "required": [ + "text", + "type", + "matchProbability" + ], + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "name", + "description", + "proficiencyLevel" + ] + }, + "matchProbability": { + "type": "number" + }, + "alignment": { + "type": "string", + "enum": [ + "aligning", + "neutral", + "contradicting" + ] + }, + "reason": { + "type": "string" + } + } + } + } + } +} \ No newline at end of file diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json new file mode 100644 index 000000000..032b82a8d --- /dev/null +++ b/src/competence-matcher/package.json @@ -0,0 +1,40 @@ +{ + "name": "competence-matcher", + "version": "0.1.0", + "description": "Matching microservice that allows to allows to define and match on data criteria", + "main": "dist/server.js", + "scripts": { + "dev": "ts-node-dev --respawn --transpile-only src/server.ts", + "build": "tsc", + "run-production": "node dist/server.js" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/PROCEED-Labs/proceed.git" + }, + "keywords": [ + "embedding", + "matching" + ], + "author": "PROCEED Project", + "license": "MIT", + "bugs": { + "url": "https://github.com/PROCEED-Labs/proceed/issues" + }, + "homepage": "https://github.com/PROCEED-Labs/proceed#readme", + "dependencies": { + "@huggingface/transformers": "^3.5.2", + "express": "^5.1.0", + "ollama": "^0.5.16", + "sqlite-vec": "^0.1.7-alpha.2" + }, + "devDependencies": { + "@types/express": "^5.0.2", + "@types/node": "^22.15.30", + "ts-node-dev": "^2.0.0", + "typescript": "^5.8.3" + }, + "engines": { + "node": ">=23.5.0" + } +} \ No newline at end of file diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts new file mode 100644 index 000000000..02a8a7001 --- /dev/null +++ b/src/competence-matcher/src/config.ts @@ -0,0 +1,19 @@ +import * as os from 'node:os'; + +export const config = { + dbPath: process.env.DB_PATH || 'src/db/dbs/', + embeddingModel: process.env.EMBEDDING_MODEL || 'onnx-community/Qwen3-Embedding-0.6B-ONNX', + embeddingDim: parseInt(process.env.EMBEDDING_DIM || '1024', 10), + nliModel: process.env.NLI_MODEL || './src/models/roberta_mnli_onnx', + modelCache: process.env.MODEL_CACHE || 'src/models/', + useGPU: process.env.USE_GPU === 'true' || false, + port: parseInt(process.env.PORT || '8501', 10), + multipleDBs: process.env.MULTIPLE_DBS === 'true' || false, + ollamaPath: process.env.OLLAMA_PATH || 'http://localhost:11434', + ollamaBatchSize: parseInt(process.env.OLLAMA_BATCH_SIZE || '5', 10), + splittingModel: process.env.SPLITTING_MODEL || 'llama3.2', + reasonModel: process.env.REASON_MODEL || 'llama3.2', + splittingSymbol: process.env.SPLITTING_SYMBOL || 'SPLITTING_SYMBOL', + maxWorkerThreads: parseInt(process.env.NUMBER_OF_THREADS || String(os.cpus().length - 1), 10), // -1 for main thread + maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds +}; diff --git a/src/competence-matcher/tsconfig.json b/src/competence-matcher/tsconfig.json new file mode 100644 index 000000000..c612291b4 --- /dev/null +++ b/src/competence-matcher/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "lib": ["esnext"], + "target": "ES2020", + "module": "commonjs", + "rootDir": "src", + "outDir": "dist", + "strict": true, + "esModuleInterop": true, + "moduleResolution": "node" + }, + "include": ["src"] +} From 73fbd6688334b74840246ccae3333a744a87fcef Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:39:37 +0200 Subject: [PATCH 03/48] Implement DBManager and VectorDataBase classes for database management and operations --- src/competence-matcher/src/db/db-manager.ts | 162 ++++ src/competence-matcher/src/db/db.ts | 868 ++++++++++++++++++ .../src/middleware/db-locator.ts | 22 + 3 files changed, 1052 insertions(+) create mode 100644 src/competence-matcher/src/db/db-manager.ts create mode 100644 src/competence-matcher/src/db/db.ts create mode 100644 src/competence-matcher/src/middleware/db-locator.ts diff --git a/src/competence-matcher/src/db/db-manager.ts b/src/competence-matcher/src/db/db-manager.ts new file mode 100644 index 000000000..d5ab99d4f --- /dev/null +++ b/src/competence-matcher/src/db/db-manager.ts @@ -0,0 +1,162 @@ +import * as path from 'node:path'; +import * as fs from 'node:fs'; +import VectorDataBase from './db'; +import { config } from '../config'; + +const { dbPath: rawDbPath, embeddingDim } = config; + +/** + * DBManager: Singleton that manages multiple VectorDataBase instances keyed by name. + * + */ +class DBManager { + private static managerInstance: DBManager; + private dbInstances = new Map(); + private static activeDB: VectorDataBase | null = null; + private dbPath: string; + private embeddingDim: number; + + private constructor() { + this.embeddingDim = embeddingDim; + + // Resolve absolute path for storage directory + this.dbPath = path.resolve(rawDbPath); + // Ensure directory exists + if (!fs.existsSync(this.dbPath)) { + fs.mkdirSync(this.dbPath, { recursive: true }); + } + // Load existing databases + this.loadSavedDBs(); + } + + /** + * Retrieve the singleton DBManager instance, initialising if necessary. + * @returns DBManager singleton + */ + public static getInstance(): DBManager { + if (!DBManager.managerInstance) { + DBManager.managerInstance = new DBManager(); + } + return DBManager.managerInstance; + } + + /** + * Initialise the DBManager and load any existing databases. + */ + private loadSavedDBs(): void { + // Load existing databases from the storage directory + const files = fs.readdirSync(this.dbPath); + files.forEach((file) => { + if (file.endsWith('.db')) { + const dbName = path.basename(file, '.db'); + this.addDBInstance(dbName); + } + }); + } + + /** + * Normalise a database name by stripping any extension and enforcing `.db`. + * @param dbName Name provided; may include extension. + * @returns Normalised filename ending with `.db`. + */ + private normaliseDBName(dbName: string): string { + const base = path.basename(dbName, path.extname(dbName)); + return `${base}.db`; + } + + /** + * Resolve full absolute path to the DB file under storage directory. + * @param dbName Name provided by user; normalised to `.db` and joined with storage dir. + * @returns Absolute file path for the database. + */ + private resolveDbPath(dbName: string): string { + const normalisedDBName = this.normaliseDBName(dbName); + return path.join(this.dbPath, normalisedDBName); + } + + /** + * Internal: create and cache a new VectorDataBase instance for the given name. + * Uses resolveDbPath to obtain the absolute file path. + * @param dbName Name provided by user; normalised internally. + * @returns Newly created VectorDataBase instance. + */ + private addDBInstance(dbName: string): VectorDataBase { + const normalisedDBName = this.normaliseDBName(dbName); + const filePath = this.resolveDbPath(normalisedDBName); + const db = new VectorDataBase({ filePath, embeddingDim: this.embeddingDim }); + this.dbInstances.set(normalisedDBName, db); + return db; + } + + /** + * Get the currently active VectorDataBase instance, if set via setActiveDB. + * @returns Active VectorDataBase or null if none is set. + */ + public static getActiveDB(): VectorDataBase | null { + return DBManager.activeDB; + } + + /** + * Set the active database by name. Creates the instance if it does not exist. + * @param dbName Name of the database (without extension or with any extension). + */ + public static setActiveDB(dbName: string): void { + const manager = DBManager.getInstance(); + const normalisedDBName = manager.normaliseDBName(dbName); + if (!manager.dbInstances.has(normalisedDBName)) { + manager.addDBInstance(normalisedDBName); + } + DBManager.activeDB = manager.dbInstances.get(normalisedDBName)!; + } + + /** + * Retrieve (or create) the VectorDataBase instance for given name. + * @param dbName Name of the database (without extension or with any extension). + * @returns VectorDataBase instance corresponding to the name. + */ + public getDB(dbName: string): VectorDataBase { + const normalisedDBName = this.normaliseDBName(dbName); + if (this.dbInstances.has(normalisedDBName)) { + return this.dbInstances.get(normalisedDBName)!; + } + return this.addDBInstance(normalisedDBName); + } + + /** + * Close and remove the VectorDataBase instance for given name. + * @param dbName Name of the database to close. + * @returns True if instance existed and was closed; false otherwise. + */ + public closeDB(dbName: string): boolean { + const normalisedDBName = this.normaliseDBName(dbName); + const db = this.dbInstances.get(normalisedDBName); + if (db) { + db.close(); + this.dbInstances.delete(normalisedDBName); + if (DBManager.activeDB === db) { + DBManager.activeDB = null; + } + return true; + } + return false; + } + + /** + * Close and remove all managed VectorDataBase instances. + */ + public closeAllDBs(): void { + this.dbInstances.forEach((db) => db.close()); + this.dbInstances.clear(); + DBManager.activeDB = null; + } + + /** + * List the names (normalised) of all managed databases. + * @returns Array of database filenames (e.g. ['tenant1.db', 'other.db']). + */ + public listDBs(): string[] { + return Array.from(this.dbInstances.keys()); + } +} + +export default DBManager; diff --git a/src/competence-matcher/src/db/db.ts b/src/competence-matcher/src/db/db.ts new file mode 100644 index 000000000..c92974a24 --- /dev/null +++ b/src/competence-matcher/src/db/db.ts @@ -0,0 +1,868 @@ +// db.ts +import { DatabaseSync } from 'node:sqlite'; +import * as path from 'node:path'; +import * as sqliteVec from 'sqlite-vec'; +import { v4 as uuid } from 'uuid'; +import { CompetenceDBOutput, VectorDBOptions } from '../utils/types'; + +class VectorDataBase { + private db: DatabaseSync; + private embeddingDim: number; + private transactionInProgress = false; + + /** + * Opens (or creates) the SQLite DB, enables FKs, loads sqlite-vec, and sets up schema. + */ + constructor(opts: VectorDBOptions) { + this.embeddingDim = opts.embeddingDim; + const dbPath = + !opts.filePath || opts.filePath === ':memory:' + ? ':memory:' + : path.isAbsolute(opts.filePath) + ? opts.filePath + : path.join(process.cwd(), opts.filePath); + + this.db = new DatabaseSync(dbPath, { allowExtension: true }); + this.db.exec(`PRAGMA foreign_keys = ON;`); + sqliteVec.load(this.db); + this.initSchema(); + } + + /** Close the database connection */ + public close(): void { + this.db.close(); + } + + /** Run a set of operations atomically (in a transaction) */ + public atomicStep(cb: () => void): void { + if (this.transactionInProgress) throw new Error('Transaction already in progress'); + this.transactionInProgress = true; + this.db.exec('BEGIN'); + try { + cb(); + this.db.exec('COMMIT'); + } catch (e) { + this.db.exec('ROLLBACK'); + throw e; + } finally { + this.transactionInProgress = false; + } + } + + /** Set up all tables, indexes and virtual tables */ + private initSchema() { + // jobs + this.db.exec(` + CREATE TABLE IF NOT EXISTS jobs ( + id TEXT PRIMARY KEY, + status TEXT NOT NULL DEFAULT 'pending', + reference_id TEXT + ); + `); + + // matches + this.db.exec(` + CREATE TABLE IF NOT EXISTS match_results ( + id TEXT PRIMARY KEY, -- UUID for this match record + job_id TEXT NOT NULL REFERENCES jobs(id) ON DELETE CASCADE, + task_id TEXT NOT NULL, -- task ID this match belongs to, + task_text TEXT NOT NULL, -- task text that was used for matching + competence_id TEXT NOT NULL, -- matched competence + resource_id TEXT NOT NULL, -- resource ID the competence belongs to + distance REAL NOT NULL, -- similarity score + text TEXT NOT NULL, -- the matched snippet + type TEXT NOT NULL, -- 'name' | 'description' | 'proficiencyLevel' + alignment TEXT NOT NULL, -- 'contradicting' | 'neutral' | 'aligning' + reason TEXT -- llm based reason for the match + ); + `); + this.db.exec(` + CREATE INDEX IF NOT EXISTS ix_match_results_job + ON match_results(job_id); + `); + + // resource_list + this.db.exec(` + CREATE TABLE IF NOT EXISTS resource_list ( + id TEXT PRIMARY KEY + ); + `); + + // resources (internal PK + user‐facing ID) + this.db.exec(` + CREATE TABLE IF NOT EXISTS resource ( + _rid INTEGER PRIMARY KEY AUTOINCREMENT, + resource_id TEXT NOT NULL, + list_id TEXT NOT NULL REFERENCES resource_list(id) ON DELETE CASCADE + ); + `); + this.db.exec(` + CREATE UNIQUE INDEX IF NOT EXISTS ux_resource_list_resid + ON resource(list_id, resource_id); + `); + + // competences (internal PK + user‐facing ID) + this.db.exec(` + CREATE TABLE IF NOT EXISTS competence ( + _cid INTEGER PRIMARY KEY AUTOINCREMENT, + competence_id TEXT NOT NULL, + resource_rid INTEGER NOT NULL REFERENCES resource(_rid) ON DELETE CASCADE, + competence_name TEXT, + competence_description TEXT, + external_qualification_needed BOOLEAN DEFAULT FALSE, + renew_time INTEGER, + proficiency_level TEXT, + qualification_dates TEXT, + last_usages TEXT + ); + `); + this.db.exec(` + CREATE UNIQUE INDEX IF NOT EXISTS ux_competence_rescid + ON competence(resource_rid, competence_id); + `); + + // embeddings (virtual vec0 table; explicit deletes required) + this.db.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS competence_embedding + USING vec0( + cid INTEGER NOT NULL REFERENCES competence(_cid) ON DELETE CASCADE, + text TEXT, + type TEXT, + embedding FLOAT32[${this.embeddingDim}] + ); + `); + } + + /*-------------------------------------------------------------------- + * Helper Lookups + *------------------------------------------------------------------*/ + + /** Get the internal `_rid` for a given user‐facing resourceId + listId */ + private getResourceRid(resourceId: string, listId: string): number { + const row = this.db + .prepare(`SELECT _rid FROM resource WHERE resource_id = ? AND list_id = ?`) + .get(resourceId, listId); + if (!row) throw new Error(`Resource '${resourceId}' not found in list '${listId}'`); + return row._rid as number; + } + + /** Get the internal `_cid` for a given user‐facing competenceId */ + private getCompetenceCidByCompetenceId( + listId: string, + resourceId: string, + competenceId: string, + ): number { + const _rid = this.getResourceRid(resourceId, listId); + const row = this.db + .prepare( + ` + SELECT _cid + FROM competence + WHERE competence_id = ? + AND resource_rid = ? + `, + ) + .get(competenceId, _rid); + if (!row) + throw new Error( + `Competence '${competenceId}' in List ${listId} in Resource ${resourceId} not found`, + ); + return row._cid as number; + } + + /*-------------------------------------------------------------------- + * Job Methods + *------------------------------------------------------------------*/ + + /** + * Create a new background‐job record. + * @param referenceId Optionally point back to a resource-list, resource, or competence ID. + * @returns the new job’s UUID. + */ + public createJob(referenceId?: string): string { + const jobId = uuid(); + this.db + .prepare(`INSERT INTO jobs(id, reference_id) VALUES (?, ?)`) + .run(jobId, referenceId ?? null); + return jobId; + } + + /** + * Change a job’s status. + * @throws if no such job exists. + */ + public updateJobStatus( + jobId: string, + status: 'pending' | 'preprocessing' | 'running' | 'completed' | 'failed', + ): void { + const result = this.db.prepare(`UPDATE jobs SET status = ? WHERE id = ?`).run(status, jobId); + if (result.changes === 0) throw new Error(`Job with id ${jobId} not found`); + } + + /** + * Look up a job’s current status and its referenceId. + * @throws if no such job exists. + */ + public getJob(jobId: string): { jobId: string; status: string; referenceId?: string } { + const row = this.db + .prepare(`SELECT id, status, reference_id FROM jobs WHERE id = ?`) + .get(jobId) as { id: string; status: string; reference_id: string } | undefined; + if (!row) throw new Error(`Job with id ${jobId} not found`); + return { jobId: row.id, status: row.status, referenceId: row.reference_id ?? undefined }; + } + + /*-------------------------------------------------------------------- + * Match Methods + *------------------------------------------------------------------*/ + + /** + * Add a match result for a job, task, and competence. + * + * @param opts Options for adding a match result. + * @throws if the jobId, taskId, or competenceId do not exist. + */ + public addMatchResult(opts: { + jobId: string; + taskId: string; + taskText: string; + competenceId: string; + resourceId: string; + distance: number; + text: string; + type: string; // 'name' | 'description' | 'proficiencyLevel' + alignment: string; // 'contradicting' | 'neutral' | 'aligning' + reason?: string; // optional reason for the match + }): void { + const id = uuid(); + this.db + .prepare( + ` + INSERT INTO match_results + (id, job_id, task_id, task_text, competence_id, resource_id, distance, text, type, alignment, reason) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `, + ) + .run( + id, + opts.jobId, + opts.taskId, + opts.taskText, + opts.competenceId, + opts.resourceId, + opts.distance, + opts.text, + opts.type, + opts.alignment, + opts.reason ?? null, + ); + } + + /** + * Fetch all match results for a given jobId. + * @returns an array of match results, sorted by taskId and distance. + */ + public getMatchResults(jobId: string): Array<{ + taskId: string; + taskText: string; + competenceId: string; + resourceId: string; + distance: number; + text: string; + type: string; + alignment: string; // 'contradicting' | 'neutral' | 'aligning' + reason?: string; + }> { + return this.db + .prepare( + ` + SELECT task_id, task_text, competence_id, resource_id, distance, text, type, alignment, reason + FROM match_results + WHERE job_id = ? + ORDER BY task_id, distance + `, + ) + .all(jobId) + .map((r: any) => ({ + taskId: r.task_id, + taskText: r.task_text, + competenceId: r.competence_id, + resourceId: r.resource_id, + distance: r.distance, + text: r.text, + type: r.type, + alignment: r.alignment, + reason: r.reason ?? undefined, + })); + } + + /*-------------------------------------------------------------------- + * ResourceList Methods + *------------------------------------------------------------------*/ + + /** Create a fresh, empty resource‐list and return its UUID */ + public createResourceList(): string { + const listId = uuid(); + this.db.prepare(`INSERT INTO resource_list(id) VALUES (?)`).run(listId); + return listId; + } + + /** + * Delete an entire list—this cascades down to resources, competences, + * explicitly wipes embeddings. + */ + public deleteResourceList(listId: string): void { + this.atomicStep(() => { + this.db + .prepare( + ` + DELETE FROM competence_embedding + WHERE cid IN ( + SELECT c._cid + FROM competence c + JOIN resource r ON c.resource_rid = r._rid + WHERE r.list_id = ? + ) + `, + ) + .run(listId); + this.db.prepare(`DELETE FROM resource_list WHERE id = ?`).run(listId); + }); + } + + /** Enumerate all list IDs */ + public getAvailableResourceLists(): string[] { + return this.db + .prepare(`SELECT id FROM resource_list`) + .all() + .map((r) => (r as any).id); + } + + /** + * Fetch a list plus all its resources and each resource’s competences. + * @throws if listId doesn’t exist. + */ + public getResourceList(listId: string): { + competenceListId: string; + resources: Array<{ + resourceId: string; + competencies: Array<{ + competenceId: string; + name?: string; + description?: string; + externalQualificationNeeded: boolean; + renewTime?: number; + proficiencyLevel?: string; + qualificationDates: string[]; + lastUsages: string[]; + }>; + }>; + } { + const exists = this.db.prepare(`SELECT 1 FROM resource_list WHERE id = ?`).get(listId); + if (!exists) throw new Error(`Resource list '${listId}' not found`); + + const resources = this.db + .prepare(`SELECT _rid, resource_id FROM resource WHERE list_id = ?`) + .all(listId) as Array<{ _rid: number; resource_id: string }>; + + return { + competenceListId: listId, + resources: resources.map(({ _rid, resource_id }) => { + const comps = this.db + .prepare( + ` + SELECT competence_id, competence_name, competence_description, + external_qualification_needed, renew_time, + proficiency_level, qualification_dates, last_usages + FROM competence + WHERE resource_rid = ? + `, + ) + .all(_rid) as CompetenceDBOutput[]; + return { + resourceId: resource_id, + competencies: comps.map((c) => ({ + competenceId: c.competence_id, + name: c.competence_name ?? undefined, + description: c.competence_description ?? undefined, + externalQualificationNeeded: Boolean(c.external_qualification_needed), + renewTime: c.renew_time ?? undefined, + proficiencyLevel: c.proficiency_level ?? undefined, + qualificationDates: c.qualification_dates ? JSON.parse(c.qualification_dates) : [], + lastUsages: c.last_usages ? JSON.parse(c.last_usages) : [], + })), + }; + }), + }; + } + + /*-------------------------------------------------------------------- + * Resource Methods + *------------------------------------------------------------------*/ + + /** + * Add a resource (user‐facing ID) into a list. + * Returns the user‐facing resourceId. + */ + public addResource(listId: string, resourceId?: string): string { + const rid = resourceId ?? uuid(); + this.db.prepare(`INSERT INTO resource(resource_id, list_id) VALUES (?, ?)`).run(rid, listId); + return rid; + } + + /** + * Move a resource from one list to another. + * @param oldListId current list + * @param resourceId user‐facing ID + * @param newListId target list + */ + public updateResource(oldListId: string, resourceId: string, newListId: string): void { + const _rid = this.getResourceRid(resourceId, oldListId); + this.db.prepare(`UPDATE resource SET list_id = ? WHERE _rid = ?`).run(newListId, _rid); + } + + /** + * Delete a single resource (and its subtree) by user‐facing ID + list. + */ + public deleteResource(listId: string, resourceId: string): void { + this.atomicStep(() => { + const _rid = this.getResourceRid(resourceId, listId); + this.db + .prepare( + ` + DELETE FROM competence_embedding + WHERE cid IN (SELECT _cid FROM competence WHERE resource_rid = ?) + `, + ) + .run(_rid); + this.db.prepare(`DELETE FROM resource WHERE _rid = ?`).run(_rid); + }); + } + + /** + * Fetch one resource + * @param listId user‐facing ID of the resource list + * @param resourceId user‐facing ID of the resource + * @returns an object with the resource’s metadata. + * @throws if not found. + */ + public getResource( + listId: string, + resourceId: string, + ): { + listId: string; + resourceId: string; + competencies: Array<{ + competenceId: string; + name?: string; + description?: string; + externalQualificationNeeded: boolean; + renewTime?: number; + proficiencyLevel?: string; + qualificationDates: string[]; + lastUsages: string[]; + }>; + } { + const row = this.db + .prepare( + ` + SELECT _rid, resource_id, list_id + FROM resource + WHERE resource_id = ? + AND list_id = ? + `, + ) + .get(resourceId, listId) as + | { _rid: number; resource_id: string; list_id: string } + | undefined; + if (!row) throw new Error(`Resource '${resourceId}' in List '${listId}' not found`); + + const comps = this.db + .prepare( + ` + SELECT competence_id, competence_name, competence_description, + external_qualification_needed, renew_time, + proficiency_level, qualification_dates, last_usages + FROM competence + WHERE resource_rid = ? + `, + ) + .all(row._rid) as CompetenceDBOutput[]; + + return { + listId: row.list_id, + resourceId, + competencies: comps.map((c) => ({ + competenceId: c.competence_id, + name: c.competence_name ?? undefined, + description: c.competence_description ?? undefined, + externalQualificationNeeded: Boolean(c.external_qualification_needed), + renewTime: c.renew_time ?? undefined, + proficiencyLevel: c.proficiency_level ?? undefined, + qualificationDates: c.qualification_dates ? JSON.parse(c.qualification_dates) : [], + lastUsages: c.last_usages ? JSON.parse(c.last_usages) : [], + })), + }; + } + + /*-------------------------------------------------------------------- + * Competence Methods + *------------------------------------------------------------------*/ + + /** + * Add a competence under a given resource. + * @param listId user‐facing ID of the resource list + * @param resourceId user‐facing + * @param competence Input object (may supply its own competenceId) + * @returns the user‐facing competenceId + */ + public addCompetence( + listId: string, + resourceId: string, + competence: { + competenceId?: string; + name?: string; + description?: string; + externalQualificationNeeded?: boolean; + renewTime?: number; + proficiencyLevel?: string; + qualificationDates?: string[]; + lastUsages?: string[]; + }, + ): string { + const cidUser = competence.competenceId ?? uuid(); + const _rid = this.getResourceRid(resourceId, listId); + + this.db + .prepare( + ` + INSERT INTO competence + (competence_id, resource_rid, + competence_name, competence_description, + external_qualification_needed, renew_time, + proficiency_level, qualification_dates, last_usages) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `, + ) + .run( + cidUser, + _rid, + competence.name ?? null, + competence.description ?? null, + competence.externalQualificationNeeded ? 1 : 0, + competence.renewTime ?? null, + competence.proficiencyLevel ?? null, + competence.qualificationDates ? JSON.stringify(competence.qualificationDates) : null, + competence.lastUsages ? JSON.stringify(competence.lastUsages) : null, + ); + + return cidUser; + } + + /** + * Update a competence’s metadata. + * @param listId user‐facing ID of the resource list + * @param resourceId user‐facing ID of the resource + * @param competenceId user‐facing ID of the competence to update + * @param fields object with fields to update; only those provided will be changed + * @returns nothing, but throws if the competence does not exist. + * @throws if no such competence exists on the resource. + */ + public updateCompetence( + listId: string, + resourceId: string, + competenceId: string, + fields: { + name?: string; + description?: string; + externalQualificationNeeded?: boolean; + renewTime?: number; + proficiencyLevel?: string; + qualificationDates?: string[]; + lastUsages?: string[]; + }, + ): void { + const _rid = this.getResourceRid(resourceId, listId); + const _cid = this.db + .prepare(`SELECT _cid FROM competence WHERE competence_id = ? AND resource_rid = ?`) + .get(competenceId, _rid)?.['_cid']; + if (!_cid) throw new Error(`Competence '${competenceId}' not on resource '${resourceId}'`); + + const sets: string[] = []; + const params: any[] = []; + if (fields.name !== undefined) { + sets.push(`competence_name = ?`); + params.push(fields.name); + } + if (fields.description !== undefined) { + sets.push(`competence_description = ?`); + params.push(fields.description); + } + if (fields.externalQualificationNeeded !== undefined) { + sets.push(`external_qualification_needed = ?`); + params.push(fields.externalQualificationNeeded ? 1 : 0); + } + if (fields.renewTime !== undefined) { + sets.push(`renew_time = ?`); + params.push(fields.renewTime); + } + if (fields.proficiencyLevel !== undefined) { + sets.push(`proficiency_level = ?`); + params.push(fields.proficiencyLevel); + } + if (fields.qualificationDates !== undefined) { + sets.push(`qualification_dates = ?`); + params.push(JSON.stringify(fields.qualificationDates)); + } + if (fields.lastUsages !== undefined) { + sets.push(`last_usages = ?`); + params.push(JSON.stringify(fields.lastUsages)); + } + + if (sets.length > 0) { + params.push(_cid); + this.db.prepare(`UPDATE competence SET ${sets.join(', ')} WHERE _cid = ?`).run(...params); + } + } + + /** + * Delete a competence from a resource. + * @param resourceId user‐facing ID + * @param listId user‐facing ID of the resource list + * @param competenceId user‐facing ID of the competence to delete + * @throws if no such competence exists on the resource. + */ + public deleteCompetence(listId: string, resourceId: string, competenceId: string): void { + this.atomicStep(() => { + const _rid = this.getResourceRid(resourceId, listId); + const _cid = this.db + .prepare(`SELECT _cid FROM competence WHERE competence_id = ? AND resource_rid = ?`) + .get(competenceId, _rid)?._cid; + if (!_cid) throw new Error(`Competence '${competenceId}' not on resource '${resourceId}'`); + + // explicitly delete embeddings + this.db.prepare(`DELETE FROM competence_embedding WHERE cid = ?`).run(_cid); + // then delete competence row + this.db.prepare(`DELETE FROM competence WHERE _cid = ?`).run(_cid); + }); + } + + /** + * Fetch one competence’s metadata (including listId + resourceId). + * @param listId user‐facing ID of the resource list + * @param resourceId user‐facing ID of the resource + * @param competenceId user‐facing ID of the competence to fetch + * @returns an object with the competence’s metadata. + * @throws if no such competence exists on the resource. + * @throws if the competenceId is not found on the resource. + */ + public getCompetence( + listId: string, + resourceId: string, + competenceId: string, + ): { + listId: string; + resourceId: string; + competenceId: string; + name?: string; + description?: string; + externalQualificationNeeded: boolean; + renewTime?: number; + proficiencyLevel?: string; + qualificationDates: string[]; + lastUsages: string[]; + } { + const _rid = this.getResourceRid(resourceId, listId); + const row = this.db + .prepare( + ` + SELECT c.competence_id, c.competence_name, c.competence_description, + c.external_qualification_needed, c.renew_time, + c.proficiency_level, c.qualification_dates, c.last_usages + FROM competence c + WHERE c.competence_id = ? AND c.resource_rid = ? + `, + ) + .get(competenceId, _rid) as + | { + competence_id: string; + competence_name: string | null; + competence_description: string | null; + external_qualification_needed: number; + renew_time: number | null; + proficiency_level: string | null; + qualification_dates: string | null; + last_usages: string | null; + } + | undefined; + if (!row) throw new Error(`Competence '${competenceId}' not on resource '${resourceId}'`); + + return { + listId, + resourceId, + competenceId: row.competence_id, + name: row.competence_name ?? undefined, + description: row.competence_description ?? undefined, + externalQualificationNeeded: Boolean(row.external_qualification_needed), + renewTime: row.renew_time ?? undefined, + proficiencyLevel: row.proficiency_level ?? undefined, + qualificationDates: row.qualification_dates ? JSON.parse(row.qualification_dates) : [], + lastUsages: row.last_usages ? JSON.parse(row.last_usages) : [], + }; + } + + /*-------------------------------------------------------------------- + * Embedding Methods + *------------------------------------------------------------------*/ + + /** + * Insert or replace a text embedding for a competence. + * This will overwrite any existing embedding for the same text and type. + * @param embeddingInput object with competenceId, text, type, and embedding vector. + * @throws if the embedding vector does not match the configured dimension. + */ + public upsertEmbedding(embeddingInput: { + listId: string; + resourceId: string; + competenceId: string; + text: string; + type: 'name' | 'description' | 'proficiencyLevel'; + embedding: number[]; + }): void { + const { listId, resourceId, competenceId, text, type, embedding } = embeddingInput; + if (embedding.length !== this.embeddingDim) { + throw new Error(`Embedding must have length ${this.embeddingDim}`); + } + const cid = this.getCompetenceCidByCompetenceId(listId, resourceId, competenceId); + + const cidInt = `${Math.floor(cid)}`; // This + the cast is a workaround, sqlite-vec or sqlite read the cid as a float even though it is an integer. (Could be the lib or the fact that it is a virtual table, not sure) + + this.db + .prepare( + ` + INSERT OR REPLACE INTO competence_embedding + (cid, text, type, embedding) + VALUES (CAST(? AS INTEGER), ?, ?, vec_f32(?)) + `, + ) + .run(cidInt, text, type, new Float32Array(embedding)); + } + + /** Delete all embeddings for one competence + * @param listId user‐facing ID of the resource list + * @param resourceId user‐facing ID of the resource + * @param competenceId user‐facing ID of the competence + */ + public deleteEmbeddingsForCompetence( + listId: string, + resourceId: string, + competenceId: string, + ): void { + const cid = this.getCompetenceCidByCompetenceId(listId, resourceId, competenceId); + this.db.prepare(`DELETE FROM competence_embedding WHERE cid = ?`).run(cid); + } + + /** + * kNN‐search over embeddings, returning user‐facing competenceIds + distances. + * + * @param embedding the query vector to search for + * @param options optional parameters: + * - k: number of nearest neighbors to return (default: all) + * - filter: optional filter by resourceId and/or listId + * - similarityMetric: 'cosine', 'hamming', or 'euclidean' (default: 'cosine') + * @returns an array of objects with competenceId, text, type, and distance. + * @throws if the embedding length does not match the configured dimension. + * @throws if the metric is unsupported or k is not a positive integer. + */ + public searchEmbedding( + embedding: number[], + options?: { + k?: number; + filter?: { resourceId?: string; listId?: string }; + similarityMetric?: 'cosine' | 'hamming' | 'euclidean'; + }, + ): Array<{ + competenceId: string; + resourceId: string; + text: string; + type: string; + distance: number; + }> { + const { k, filter, similarityMetric } = options || {}; + const metrics = { + cosine: 'vec_distance_cosine', + hamming: 'vec_distance_hamming', + euclidean: 'vec_distance_L2', + }; + const metric = metrics[similarityMetric || 'cosine']; + if (!metric) throw new Error(`Unsupported metric: ${similarityMetric}`); + if (embedding.length !== this.embeddingDim) throw new Error(`Embedding length mismatch`); + if (k !== undefined && k <= 0) throw new Error('k must be > 0'); + + let sql = ` + SELECT c.competence_id, r.resource_id, ce.text, ce.type, + ${metric}(ce.embedding, vec_f32(?)) AS distance + FROM competence_embedding ce + JOIN competence c ON ce.cid = c._cid + JOIN resource r ON c.resource_rid = r._rid + `; + const params: any[] = [new Float32Array(embedding)]; + + const whereClauses: string[] = []; + if (filter?.resourceId) { + whereClauses.push(`r.resource_id = ?`); + params.push(filter.resourceId); + } + if (filter?.listId) { + whereClauses.push(`r.list_id = ?`); + params.push(filter.listId); + } + if (whereClauses.length > 0) { + sql += ` WHERE ` + whereClauses.join(' AND '); + } + // sql += ` GROUP BY c.competence_id, ce.type`; + sql += ` ORDER BY distance ASC`; + + if (k) { + sql += ` LIMIT ?`; + params.push(k); + } + + const rows = this.db.prepare(sql).all(...params) as Array; + + let result = rows.map((r) => ({ + competenceId: r.competence_id, + resourceId: r.resource_id, + distance: r.distance, + text: r.text, + type: r.type, + })); + + // Normalise distances to [0, 1], depending on the metric: + if (similarityMetric === 'cosine') { + // Cosine distance is in [0, 2] + result = result.map((row) => ({ + ...row, + distance: row.distance / 2, + })); + } else if (similarityMetric === 'hamming') { + // Hamming distance is in [0, 1], so we leave it as is + } else if (similarityMetric === 'euclidean') { + // Euclidean distance is in [0, sqrt(embeddingDim)] + const maxDistance = Math.sqrt(this.embeddingDim); + result = result.map((row) => ({ + ...row, + distance: row.distance / maxDistance, + })); + } + + // Since similariy is now normalised to [0, 1], + // we need to adapt it, as it should be somewhat interpretable as a probability + result = result.map((row) => ({ + ...row, + distance: 1 - row.distance, // Convert distance to similarity + })); + return result; + } +} + +export default VectorDataBase; diff --git a/src/competence-matcher/src/middleware/db-locator.ts b/src/competence-matcher/src/middleware/db-locator.ts new file mode 100644 index 000000000..3b65c0cd6 --- /dev/null +++ b/src/competence-matcher/src/middleware/db-locator.ts @@ -0,0 +1,22 @@ +import { Request, Response, NextFunction } from 'express'; +import { config } from '../config'; + +const { multipleDBs } = config; + +export function dbHeader(req: Request, res: Response, next: NextFunction): void { + // This middleware allows for the use f multiple databases instead of a single one. + // 'x-proceed-db-id' is a custom header that should be included in the request, which specifies the database name to use. + if (multipleDBs) { + const dbName = req.header('x-proceed-db-id'); + if (!dbName || typeof dbName !== 'string' || dbName.trim() === '') { + res.status(400).json({ error: 'Missing x-proceed-db-id header' }); + return; + } + req.dbName = dbName; + } else { + // For now, we use a single database, so we can just set a default name. + req.dbName = 'PROCEED-Matching.db'; + } + + next(); +} From f89ac7563974f783f77921556609c19c3f24f0e8 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:40:00 +0200 Subject: [PATCH 04/48] Add utility files for database management, model handling, and worker operations --- src/competence-matcher/src/utils/db.ts | 7 + .../src/utils/huggingface.ts | 13 ++ src/competence-matcher/src/utils/model.ts | 63 ++++++ src/competence-matcher/src/utils/ollama.ts | 42 ++++ src/competence-matcher/src/utils/prompts.ts | 189 ++++++++++++++++++ src/competence-matcher/src/utils/types.ts | 146 ++++++++++++++ src/competence-matcher/src/utils/worker.ts | 79 ++++++++ 7 files changed, 539 insertions(+) create mode 100644 src/competence-matcher/src/utils/db.ts create mode 100644 src/competence-matcher/src/utils/huggingface.ts create mode 100644 src/competence-matcher/src/utils/model.ts create mode 100644 src/competence-matcher/src/utils/ollama.ts create mode 100644 src/competence-matcher/src/utils/prompts.ts create mode 100644 src/competence-matcher/src/utils/types.ts create mode 100644 src/competence-matcher/src/utils/worker.ts diff --git a/src/competence-matcher/src/utils/db.ts b/src/competence-matcher/src/utils/db.ts new file mode 100644 index 000000000..5d02224b2 --- /dev/null +++ b/src/competence-matcher/src/utils/db.ts @@ -0,0 +1,7 @@ +import DBManager from '../db/db-manager'; + +export function getDB(name: string) { + const dbManager = DBManager.getInstance(); + DBManager.setActiveDB(name); + return dbManager.getDB(name); +} diff --git a/src/competence-matcher/src/utils/huggingface.ts b/src/competence-matcher/src/utils/huggingface.ts new file mode 100644 index 000000000..3d6d5128f --- /dev/null +++ b/src/competence-matcher/src/utils/huggingface.ts @@ -0,0 +1,13 @@ +import Embedding from '../tasks/embedding'; +import ZeroShotSemanticOpposites from '../tasks/semantic-zeroshot'; + +export async function ensureAllHuggingfaceModelsAreAvailable() { + try { + Embedding.getInstance(); + ZeroShotSemanticOpposites.getInstance(); + } catch (error) { + throw error; + } + + console.log('All required Hugging Face models are available.'); +} diff --git a/src/competence-matcher/src/utils/model.ts b/src/competence-matcher/src/utils/model.ts new file mode 100644 index 000000000..885c6fce4 --- /dev/null +++ b/src/competence-matcher/src/utils/model.ts @@ -0,0 +1,63 @@ +import { config } from '../config'; + +import { + pipeline, + env as huggingfaceEnv, + PipelineType, + PretrainedModelOptions, +} from '@huggingface/transformers'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { TransformerPipelineOptions } from './types'; + +import { isMainThread } from 'node:worker_threads'; + +/** + * Base class that handles: + * - singleton pipeline instance + * - model cache dir + * - on‑demand loading with optional progressCallback + */ +export abstract class TransformerPipeline { + protected static instance: any = null; + protected static loaded = false; + + protected static getPipelineOptions(): TransformerPipelineOptions { + throw new Error('getPipelineOptions must be implemented in subclasses'); + } + + private static async initEnv(cacheDir?: string) { + if (cacheDir) { + const abs = path.resolve(cacheDir); + if (!fs.existsSync(abs)) fs.mkdirSync(abs, { recursive: true }); + huggingfaceEnv.cacheDir = abs; + } + huggingfaceEnv.allowLocalModels = true; + } + + public static async getInstance(): Promise { + if (this.instance === null) { + const { task, model, options } = this.getPipelineOptions(); + await this.initEnv(options?.cache_dir || config.modelCache); + + const opts: PretrainedModelOptions = { + cache_dir: options?.cache_dir || config.modelCache, + use_external_data_format: options?.use_external_data_format ?? true, + device: options?.device || (config.useGPU ? 'cuda' : 'cpu'), + dtype: options?.dtype || 'fp32', + progress_callback: options?.progress_callback, + }; + + // actually load the pipeline + this.instance = await pipeline(task as PipelineType, model, opts); + + // mark it as loaded and log on first load + if (!this.loaded && isMainThread) { + console.log(`${model} (${task}) is ready`); + this.loaded = true; + } + } + + return this.instance; + } +} diff --git a/src/competence-matcher/src/utils/ollama.ts b/src/competence-matcher/src/utils/ollama.ts new file mode 100644 index 000000000..cb53af134 --- /dev/null +++ b/src/competence-matcher/src/utils/ollama.ts @@ -0,0 +1,42 @@ +import { Ollama } from 'ollama'; +import { config } from '../config'; + +const { ollamaPath, splittingModel, reasonModel } = config; + +export const ollama = new Ollama({ + host: ollamaPath, + headers: { + 'User-Agent': 'PROCEED Competence Matcher', + }, +}); + +/** + * Ensures that all required models are available by checking their existence + * in the Ollama server. If a model is not available, it will be downloaded. + * If the model cannot be downloaded or is not available, an error will be thrown. + * (Ensures all needed models are actually available) + */ +export async function ensureAllOllamaModelsAreAvailable() { + const models = [splittingModel, reasonModel]; + + const availableModels = (await ollama.list()).models.map((model) => model.model); + + for (const model of models) { + if (!availableModels.includes(model)) { + const modelpull = await ollama.pull({ + model, + insecure: false, + stream: false, + }); + + // Check if the model was successfully pulled + if (!modelpull || modelpull.status !== 'success') { + throw new Error( + `Model ${model} could not be pulled: ${modelpull?.status || 'Unknown error'}`, + ); + } + } + } + + console.log('All required models are available in ollama.'); +} diff --git a/src/competence-matcher/src/utils/prompts.ts b/src/competence-matcher/src/utils/prompts.ts new file mode 100644 index 000000000..023fe5697 --- /dev/null +++ b/src/competence-matcher/src/utils/prompts.ts @@ -0,0 +1,189 @@ +import type { Message } from 'ollama'; +import { config } from '../config'; + +const { splittingSymbol } = config; + +const SEMANTIC_SPLITTER_INTRUCT: Message = { + role: 'system', + content: ` + Your task is to segment the following text (i.e. user input such as plain prose, bullet points or listings) into semantically independent parts. + Do not add, remove, or modify any words - under no circumstances should you ever add any additional text, comments, or explanations. + Preserve the original ordering of words within each group — but groups themselves need not follow the original sequence. + Separate each group only by the delimiter + ${splittingSymbol} + (i.e. exactly as shown, on a line by themselves, no additional whitespaces, just '${splittingSymbol}'). + If the entire input is already one coherent semantic unit, return it verbatim without any delimiter. + Grouping need do not be adjacent - just semantically related (i.e. two related text parts might be separated by other text parts). + If the input is empty, return an empty string. + If the input is a single word, return it verbatim without any delimiter. + If the input is a single sentence, return it verbatim without any delimiter. + If you are unsure about the grouping, return the entire input as a single group without any delimiters. + Under no circumstances should you ever add any additional text, comments, or explanations. + `, +}; +const SEMANTIC_SPLITTER_EXAMPLES: Message[] = [ + { + role: 'user', + content: ` + The job requires welding experience. Tick welding would be preferable if the person is familiar with it. The cage is designed for small pets like rabbits. Therefore, it must not contain any sharp edges that could harm them. Experience with welding small wires could be beneficial. + `, + }, + { + role: 'assistant', + content: ` + The job requires welding experience. Tick welding would be preferable if the person is familiar with it. Experience with welding small wires could be beneficial. + ${splittingSymbol} + The cage is designed for small pets like rabbits. Therefore, it must not contain any sharp edges that could harm them. + `, + }, + { + role: 'user', + content: ` + - Assemble circuit boards according to schematic diagrams + - Test each board for continuity and signal integrity + - Package finished units in protective casing + - Ship completed orders to customers worldwide + `, + }, + { + role: 'assistant', + content: ` + - Assemble circuit boards according to schematic diagrams + - Test each board for continuity and signal integrity + ${splittingSymbol} + - Package finished units in protective casing + - Ship completed orders to customers worldwide + `, + }, + { + role: 'user', + content: ` + 1. Prepare raw materials for production + 2. Record daily output and machine performance. + 3. Clean workstations and restock supplies + 4. Order order new materials when needed. + 5. Calibrate and maintain measurement instruments. + `, + }, + { + role: 'assistant', + content: ` + 1. Prepare raw materials for production + 4. Order order new materials when needed. + ${splittingSymbol} + 2. Record daily output and machine performance. + ${splittingSymbol} + 3. Clean workstations and restock supplies + ${splittingSymbol} + 5. Calibrate and maintain measurement instruments. + `, + }, + { + role: 'user', + content: ` + Operate CNC milling machines to produce precision metal parts. Perform quality inspections using calipers, micrometers, and gauges. Monitor machine operation and adjust feed rates, speeds, and tooling as needed. Maintain a clean and safe workspace, following all OSHA safety guidelines. Collaborate with engineers to troubleshoot design issues and implement improvements. Document production logs, inspection reports, and maintenance records daily. Assist in training new operators on standard operating procedures and best practices. + `, + }, + { + role: 'assistant', + content: ` + Operate CNC milling machines to produce precision metal parts. + Monitor machine operation and adjust feed rates, speeds, and tooling as needed. + ${splittingSymbol} + Perform quality inspections using calipers, micrometers, and gauges. + Collaborate with engineers to troubleshoot design issues and implement improvements. + ${splittingSymbol} + Maintain a clean and safe workspace, following all OSHA safety guidelines. + Document production logs, inspection reports, and maintenance records daily. + Assist in training new operators on standard operating procedures and best practices. + `, + }, +]; + +export const SEMANTIC_SPLITTER: Message[] = [ + SEMANTIC_SPLITTER_INTRUCT, + ...SEMANTIC_SPLITTER_EXAMPLES, +]; + +/** + * ------------------------------------------------------------- + */ + +const MATCH_REASON_INTRUCT: Message = { + role: 'system', + content: ` + You are an expert in generating reasons for matching scores between tasks and competences. + Your task is to generate a reason for the matching score between a task and a competence. + The reason should be one to three short, concise sentence that explain why the task and competence match as well as they did or why they did not match that well. + Do not mention the similarity score in your response. + The reason should be based on the text of the task and the competence and their estimated normalized similarity score. + The similarity score is a number between 0 and 1, where 0 means no similarity and 1 means perfect similarity. + Do not mention the similarity score in your response. + `, +}; + +const MATCH_REASON_EXAMPLES: Message[] = [ + { + role: 'user', + content: ` + Task: Operate CNC milling machines to produce precision metal parts. + Competence: Experience with CNC milling machines and precision machining. + Similarity Score: 0.95 + `, + }, + { + role: 'assistant', + content: ` + The statements match very well because the task requires operating CNC milling machines, which is exactly what the competence is about. + `, + }, + { + role: 'user', + content: ` + Task: Assemble circuit boards according to schematic diagrams. + Competence: Basic knowledge of electronics and soldering skills. + Similarity Score: 0.65 + `, + }, + { + role: 'assistant', + content: ` + The the statements have a moderate match because while assembling circuit boards requires some knowledge of electronics, it does not specifically require advanced soldering skills. + `, + }, + { + role: 'user', + content: ` + Task: Prepare raw materials for production. + Competence: Experience with inventory management and supply chain logistics. + Similarity Score: 0.30 + `, + }, + { + role: 'assistant', + content: ` + The statements have a low match because preparing raw materials is a basic task that does not require advanced inventory management or supply chain logistics skills. + `, + }, +]; + +export const MATCH_REASON: Message[] = [MATCH_REASON_INTRUCT, ...MATCH_REASON_EXAMPLES]; + +/** + * ------------------------------------------------------------- + */ +// """""""""" +// The warehouse must maintain ambient temperatures between 15°C and 25°C to protect sensitive goods. Humidity levels should not exceed 60% to prevent corrosion and mold growth. Inventory audits are scheduled weekly to ensure accuracy and compliance with safety standards. +// """""""""" + +// """""""""" +// Our catering service provides vegetarian, vegan, and gluten-free menu options to accommodate diverse dietary needs. All dishes are prepared fresh daily using locally sourced ingredients whenever possible. Orders must be placed at least 48 hours in advance to guarantee availability. Delivery times range from 8 AM to 6 PM on weekdays. +// """""""""" + +// """""""""" +// Employees are required to complete the annual cybersecurity training module before accessing the new intranet portal. The module covers password hygiene, phishing identification, and secure remote-access procedures. Failure to complete training by the deadline will result in temporary revocation of network privileges. +// """""""""" + +// """""""""" +// The production line must operate continuously in three shifts—morning, afternoon, and night—to meet daily target outputs of 5,000 units. All machinery, including conveyor belts and hydraulic presses, requires a thorough safety inspection at the start of each shift to ensure proper lubrication and guard alignment. Operators are responsible for logging any unusual vibrations, noise anomalies, or temperature spikes immediately in the maintenance ledger. Raw material deliveries arrive twice weekly and must be verified against purchase orders for quantity, grade, and certificate of analysis before being released to the staging area. Finished goods undergo a final quality check where dimensional tolerances, surface finish, and functional performance are recorded. Any nonconforming parts are quarantined and tagged, then reported to quality assurance for root‐cause analysis. At the end of each month, team leads compile production metrics, downtime reasons, and scrap rates into a summarized report for review at the management safety and efficiency meeting. +// """""""""" diff --git a/src/competence-matcher/src/utils/types.ts b/src/competence-matcher/src/utils/types.ts new file mode 100644 index 000000000..fb51ca786 --- /dev/null +++ b/src/competence-matcher/src/utils/types.ts @@ -0,0 +1,146 @@ +import { PretrainedModelOptions } from '@huggingface/transformers'; + +export type Competence = { + listId: string; // UUIDString + resourceId: string; // UUIDString + competenceId: string; // UUIDString + name?: string; // optional + description?: string; // optional but recommended to have content + externalQualificationNeeded?: boolean; // optional + renewTime?: number; // DaysAsInteger, optional + proficiencyLevel?: string; // optional + qualificationDates?: string[]; // ISO date strings, optional + lastUsages?: string[]; // ISO date strings, optional +}; + +export type CompetenceInput = { + competenceId?: string; + name?: string; + description?: string; + externalQualificationNeeded?: boolean; + renewTime?: number; + proficiencyLevel?: string; + qualificationDates?: string[]; + lastUsages?: string[]; +}; + +export type ResourceInput = { + resourceId?: string; + competencies: CompetenceInput[]; +}; + +export type ResourceListInput = ResourceInput[]; + +export type MatchingTask = { + taskId: string; // UUIDString + name?: string; // optional + description?: string; // optional but recommended to have content + executionInstructions?: string; // optional, e.g. HTML + requiredCompetencies?: string[] | CompetenceInput[]; // either array of competenceIds or array of CompetenceInput +}; + +export type Match = { + competenceId: string; + resourceId: string; + text: string; + type: string; + distance: number; + reason?: string; +}; + +export interface VectorDBOptions { + filePath?: string; // If undefined or ":memory:", use in-memory; else path to file - Note: memory will not work with workers!! + embeddingDim: number; +} + +export type CompetenceDBOutput = { + competence_id: string; + competence_name: string | null; + competence_description: string | null; + external_qualification_needed: number; // 0 or 1 + renew_time: number | null; + proficiency_level: string | null; + qualification_dates: string | null; // JSON string + last_usages: string | null; // JSON string +}; + +export type EmbeddingTask = { + listId: string; // UUIDString + resourceId: string; // UUIDString + competenceId: string; // UUIDString + text: string; // Text to embed + type: 'name' | 'description' | 'proficiencyLevel'; // Type of text +}; + +export interface Job { + jobId: string; + dbName: string; +} + +export interface EmbeddingJob extends Job { + tasks: EmbeddingTask[]; +} + +export interface MatchingJob extends Job { + listId?: string; // Which List to match against + resourceId?: string; // Optional: If matching against a single resource + tasks: MatchingTask[]; // Tasks to match +} + +export type ResourceRanking = { + resourceId: string; + taskMatchings: { + taskId: string; // Which of the tasks this matching is referring to + competenceMatchings: { + competenceId: string; + matchings: { + text: string; + type: 'name' | 'description' | 'proficiencyLevel'; + // Sorted DESC by + matchProbability: number; // Normalised inverted distance (, where distance refers to the cosine similarity) + alignment: 'contradicting' | 'neutral' | 'aligning'; // Semantic opposite classification + reason?: string; // Reason for the match + }[]; // Array: Competence-Parts matched to task + // Sorted DESC by either: + avgMatchProbability: number; // Average matchProbability of all parts of this competence + avgBestFitMatchProbability: number; // Average of the parts that align well with the task, 0 means there is none + }[]; // Array: Competences matched to task + // Sorted DESC by either: + maxMatchProbability: number; // Best avgMatchingProbability of all competences for this task + maxBestFitMatchProbability: number; // Best avgBestFitMatchProbability of all competences for this task, 0 means there is none + }[]; // Array: Matching of the resource to each task, respectively + // Sorted DESC first by [not contradicting , contradicting] then by either: + avgTaskMatchProbability: number; // Average maxMatchProbability of all tasks for this resource + avgBestFitTaskMatchProbability: number; // Average maxBestFitMatchProbability of all tasks for this resource, 0 means there is none + + contradicting: boolean; // Whether there is a part in a competence of this resource that contradicts the task +}[]; + +export type TaskOverview = { + taskId: string; // UUIDString + taskText: string; // Text of the task +}[]; + +export type GroupedMatchResults = { + tasks: TaskOverview; + resourceRanking: ResourceRanking; +}; + +export type workerTypes = 'embedder' | 'matcher'; + +export interface WorkerQueue { + job: any; + workerScript: workerTypes; + options?: { + onOnline?: (job: any) => void; + onExit?: (job: any, code: number) => void; + onError?: (job: any, error: Error) => void; + onMessage?: (job: any, message: any) => void; + }; +} + +export interface TransformerPipelineOptions { + task: string; + model: string; + options?: PretrainedModelOptions; +} diff --git a/src/competence-matcher/src/utils/worker.ts b/src/competence-matcher/src/utils/worker.ts new file mode 100644 index 000000000..d7b051825 --- /dev/null +++ b/src/competence-matcher/src/utils/worker.ts @@ -0,0 +1,79 @@ +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { Worker, parentPort } from 'worker_threads'; +import VectorDataBase from '../db/db'; +import { getDB } from './db'; +import { config } from '../config'; + +const { maxJobTime } = config; + +export function createWorker(filename: string): Worker { + const tsPath = path.resolve(__dirname, `../worker/${filename}.ts`); + const jsPath = path.resolve(__dirname, `../worker/${filename}.js`); + const isTs = fs.existsSync(tsPath); + + const workerFile = isTs ? tsPath : jsPath; + + const execArgv = isTs + ? [...process.execArgv, '-r', 'ts-node/register/transpile-only'] + : process.execArgv; + + const worker = new Worker(workerFile, { execArgv }); + + return worker; +} + +export async function withJobUpdates( + job: { jobId: string; dbName: string }, + cb: (db: VectorDataBase, payload: T) => Promise, + options?: { + onStart?: () => void; + onDone?: () => void; + onError?: (error: Error) => void; + }, +) { + const db = getDB(job.dbName); + let exitCode = 0; // success by default + let maxTimeCheck = setTimeout(() => { + // if not completed by then, timeout + process.exit(2); + }, maxJobTime); + try { + if (options && options.onStart) { + options.onStart(); + } else { + db.updateJobStatus(job.jobId, 'running'); + parentPort!.postMessage({ type: 'status', jobId: job.jobId, status: 'running' }); + } + + await cb(db, job as any as T); + + if (options && options.onDone) { + options.onDone(); + } else { + db.updateJobStatus(job.jobId, 'completed'); + parentPort!.postMessage({ type: 'status', jobId: job.jobId, status: 'completed' }); + } + } catch (err) { + if (options && options.onError) { + options.onError(err as Error); + } else { + exitCode = 1; // indicate failure + parentPort!.postMessage({ + type: 'error', + jobId: job.jobId, + error: err instanceof Error ? err.message : String(err), + }); + db.updateJobStatus(job.jobId, 'failed'); + } + } finally { + clearTimeout(maxTimeCheck); + db.close(); + parentPort!.close(); + process.exit(exitCode); + } +} + +export function log(...args: any[]) { + parentPort?.postMessage({ type: 'log', message: args.map(String).join(' ') }); +} From 8ece5f6e42030729f15ddf7c76ca3c51d0835626 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:40:57 +0200 Subject: [PATCH 05/48] Add worker scripts for embedding and matching tasks with reasoning support --- src/competence-matcher/src/worker/embedder.ts | 25 ++++ src/competence-matcher/src/worker/matcher.ts | 130 ++++++++++++++++ src/competence-matcher/src/worker/test.ts | 9 ++ .../src/worker/worker-manager.ts | 141 ++++++++++++++++++ 4 files changed, 305 insertions(+) create mode 100644 src/competence-matcher/src/worker/embedder.ts create mode 100644 src/competence-matcher/src/worker/matcher.ts create mode 100644 src/competence-matcher/src/worker/test.ts create mode 100644 src/competence-matcher/src/worker/worker-manager.ts diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts new file mode 100644 index 000000000..e51f65949 --- /dev/null +++ b/src/competence-matcher/src/worker/embedder.ts @@ -0,0 +1,25 @@ +import { parentPort } from 'worker_threads'; +import Embedding from '../tasks/embedding'; +import { splitSemantically } from '../tasks/semantic-split'; +import { withJobUpdates } from '../utils/worker'; +import { config } from '../config'; +import { EmbeddingJob } from '../utils/types'; + +parentPort!.once('message', async (job: EmbeddingJob) => { + (global as any).CURRENT_JOB = job.jobId; + + await withJobUpdates(job, async (db, { tasks, jobId }) => { + let work = tasks; + // TODO: This appears to cause the worker to crash silently + // Split tasks semantically + // work = await splitSemantically(tasks); + + // For each task: embed & upsert + for (const { listId, resourceId, competenceId, text, type } of work) { + const [vector] = await Embedding.embed(text); + // console.log(`Embedded text for job ${jobId}:`, text, '->', vector); + + db.upsertEmbedding({ listId, resourceId, competenceId, text, type, embedding: vector }); + } + }); +}); diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts new file mode 100644 index 000000000..7e2fe7a93 --- /dev/null +++ b/src/competence-matcher/src/worker/matcher.ts @@ -0,0 +1,130 @@ +import { parentPort } from 'worker_threads'; +import Embedding from '../tasks/embedding'; +import { withJobUpdates } from '../utils/worker'; +import { addReason } from '../tasks/reason'; +import { Match, MatchingJob } from '../utils/types'; +import ZeroShot from '../tasks/semantic-zeroshot'; + +parentPort!.once('message', async (job: MatchingJob) => { + // For workaround: + const matchResults: { [description: string]: any[] } = {}; + for (const task of job.tasks) { + const { taskId, name, description, executionInstructions, requiredCompetencies } = task; + if (!description) { + continue; // Skip tasks without description + } + // Add task description to match results + matchResults[description] = []; + } + + await withJobUpdates( + job, + async (db, { jobId, tasks, listId: listIdFilter, resourceId: resourceIdFilter }) => { + for (const task of tasks) { + const { taskId, name, description, executionInstructions, requiredCompetencies } = task; + if (!description) { + continue; // Skip tasks without description + } + // Embed the task description + const [vector] = await Embedding.embed(description); + + // Search for matches in the competence list (and resource if provided) + let matches: Match[] = db.searchEmbedding(vector, { + filter: { + listId: listIdFilter, + resourceId: resourceIdFilter, // Optional: If matching against a single resource + }, + }); + + // TODO: This appears to cause the worker to not start at all + // Invert potentially contrastive matches + // Add reasoning for matching score + // matches = await addReason(matches, description); + + for (const match of matches) { + // Check for semantic opposites + const zeroshotText = `Task description: ${description}\nSkill/Capability description: ${match.text}`; + // From unsuitable to suitable + const contraLabels = ['contradicting', 'aligning']; + const contraHypothesis = 'Task description and Skill/Capability descriptions are {}.'; + const scalingLabls = ['perfect', 'mediocre']; + const scalingHypothesis = + 'Task description and Skill/Capability descriptions are a {} match.'; + const labelScalar = [ + 0.25, // Contradicting matches should be penalised + 0.5, // Scale it down a bit to avoid too high scores for irrelevant matches + 1, // keep the best matches as is + ]; + const contraClassification = await ZeroShot.classify( + zeroshotText, + contraLabels, + contraHypothesis, + ); + let flag: 'contradicting' | 'neutral' | 'aligning' = 'neutral'; + // console.log(contraClassification); + + // @ts-ignore + if (contraClassification.labels[0] === contraLabels[0]) { + // Invert the match distance (since it's normalised to [0,1]: 1 - distance) + match.distance = (1 - match.distance) * labelScalar[0]; + flag = 'contradicting'; + } else { + const scalingClassification = await ZeroShot.classify( + zeroshotText, + scalingLabls, + scalingHypothesis, + ); + + // console.log(scalingClassification); + if ( + // @ts-ignore + scalingClassification.labels[0] === scalingLabls[0] && + // @ts-ignore + scalingClassification.scores[0] > 0.65 + ) { + // Keep the match as is + match.distance *= labelScalar[2]; + flag = 'aligning'; + } + // @ts-ignore + else if (scalingClassification.labels[0] === scalingLabls[1]) { + // Scale it down a bit + match.distance *= labelScalar[1]; + flag = 'neutral'; + } + } + + // db.addMatchResult({ + // jobId, + // taskId, + // competenceId: match.competenceId, + // text: match.text, + // type: match.type as 'name' | 'description' | 'proficiencyLevel', + // distance: match.distance, + // reason: match.reason, + // }); + // } + + // Workaround to avoid the worker crashing silently + matchResults[description].push({ + jobId, + taskId, + taskText: description, + competenceId: match.competenceId, + resourceId: match.resourceId, + text: match.text, + type: match.type as 'name' | 'description' | 'proficiencyLevel', + alignment: flag, + distance: match.distance, + reason: match.reason, + }); + } + } + }, + { + onDone: () => { + parentPort!.postMessage({ type: 'job', job: 'reason', workload: matchResults }); + }, + }, + ); +}); diff --git a/src/competence-matcher/src/worker/test.ts b/src/competence-matcher/src/worker/test.ts new file mode 100644 index 000000000..fde1d468a --- /dev/null +++ b/src/competence-matcher/src/worker/test.ts @@ -0,0 +1,9 @@ +import { parentPort } from 'worker_threads'; +import { splitSemantically } from '../tasks/semantic-split'; +import { EmbeddingJob } from '../utils/types'; + +parentPort!.once('message', async (job: EmbeddingJob) => { + const { tasks, jobId } = job; + parentPort!.postMessage({ type: 'status', jobId, status: 'running' }); + parentPort!.postMessage(await splitSemantically(tasks)); +}); diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts new file mode 100644 index 000000000..0bb7c1a00 --- /dev/null +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -0,0 +1,141 @@ +import { Worker } from 'worker_threads'; +import { config } from '../config'; +import { createWorker } from '../utils/worker'; +import { splitSemantically } from '../tasks/semantic-split'; +import { Match, WorkerQueue, workerTypes } from '../utils/types'; +import { addReason } from '../tasks/reason'; +import { getDB } from '../utils/db'; + +class WorkerManager { + private concurrency: number; + private queue: WorkerQueue[] = []; + private active: Set = new Set(); + + constructor(concurrency: number) { + this.concurrency = concurrency; + } + + /** + * Enqueue a job for the named worker script + */ + public enqueue(job: any, workerScript: workerTypes, options: WorkerQueue['options'] = {}) { + this.queue.push({ job, workerScript, options }); + this.dispatch(); + } + + /** Try to start as many queued jobs as we have free threads */ + private dispatch() { + while (this.active.size < this.concurrency && this.queue.length > 0) { + const { job, workerScript, options } = this.queue.shift()!; + this.startWorker(job, workerScript, options); + } + } + + /** Spawn one worker, hook up its lifecycle, and send the job */ + private startWorker(job: any, workerScript: workerTypes, options: WorkerQueue['options']) { + const worker = createWorker(workerScript); + + this.active.add(worker); + + worker.once('online', () => { + // console.log(`[WorkerManager] Worker for ${workerScript} started`); + worker.postMessage(job); + + options?.onOnline?.(job); + }); + + // When the worker exits (success or failure), remove from active set & dispatch next + worker.once('exit', (code) => { + this.active.delete(worker); + if (code === 1) { + console.error(`[WorkerManager] ${workerScript} exited (failed) with code`, code); + } else if (code === 0) { + // console.log(`[WorkerManager] ${workerScript} exited successfully`); + } else if (code === 2) { + console.error(`[WorkerManager] ${workerScript} timed out`); + } + this.dispatch(); + + options?.onExit?.(job, code); + }); + + worker.once('error', (err) => { + console.error(`[WorkerManager] ${workerScript} error:`, err); + + options?.onError?.(job, err); + }); + + worker.on('message', async (message) => { + switch (message.type) { + case 'status': + console.log(`[WorkerManager] Worker for job ${message.jobId} status:`, message.status); + break; + case 'error': + console.error(`[WorkerManager] Worker for job ${message.jobId} error:`, message.error); + break; + case 'log': + console.log(`[WorkerManager] Worker for job ${message.jobId} log:`, message.message); + break; + + // Workaround for adding reasoning before saving in DB + case 'job': + switch (message.job) { + case 'reason': + await handleReasoning(job, message); + break; + } + break; + } + options?.onMessage?.(job, message); + }); + } +} + +async function handleReasoning(job: any, message: any) { + const finalMatches = []; + // Add reasoning before saving in DB + for (const [task, matches] of Object.entries(message.workload)) { + const taskMatches = await addReason< + Match & { + taskId: string; + taskText: string; + type: 'name' | 'description' | 'proficiencyLevel'; + alignment: 'contradicting' | 'neutral' | 'aligning'; + } + >( + matches as (Match & { + taskId: string; + taskText: string; + type: 'name' | 'description' | 'proficiencyLevel'; + alignment: 'contradicting' | 'neutral' | 'aligning'; + })[], + task, + ); + finalMatches.push(...taskMatches); + } + + // Save in DB + const db = getDB(job.dbName); + + for (const match of finalMatches) { + db.addMatchResult({ + jobId: job.jobId, + taskId: match.taskId, + taskText: match.taskText, + competenceId: match.competenceId, + resourceId: match.resourceId, + distance: match.distance, + text: match.text, + type: match.type, + reason: match.reason, + alignment: match.alignment, + }); + } + + // Update job status + db.updateJobStatus(job.jobId, 'completed'); +} + +// export a singleton instance +const manager = new WorkerManager(config.maxWorkerThreads); +export default manager; From 4d58b1bef124876303583a4c5ec1349afa581be2 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:41:12 +0200 Subject: [PATCH 06/48] Add middleware for logging and resource management in competence-matcher --- .../src/middleware/logging.ts | 17 + .../src/middleware/match.ts | 406 ++++++++++++++++++ .../src/middleware/resource.ts | 213 +++++++++ 3 files changed, 636 insertions(+) create mode 100644 src/competence-matcher/src/middleware/logging.ts create mode 100644 src/competence-matcher/src/middleware/match.ts create mode 100644 src/competence-matcher/src/middleware/resource.ts diff --git a/src/competence-matcher/src/middleware/logging.ts b/src/competence-matcher/src/middleware/logging.ts new file mode 100644 index 000000000..af93af303 --- /dev/null +++ b/src/competence-matcher/src/middleware/logging.ts @@ -0,0 +1,17 @@ +import { Request, Response, NextFunction } from 'express'; + +export function requestLogger(req: Request, res: Response, next: NextFunction): void { + const { method, query, body, headers, params } = req; + const logData = { + time: new Date().toISOString(), + method, + path: req.path, + query: JSON.stringify(query, null, 2), + body, + headers: JSON.stringify(headers, null, 2), + params: JSON.stringify(params, null, 2), + ip: req.ip, + }; + console.table([logData]); + next(); +} diff --git a/src/competence-matcher/src/middleware/match.ts b/src/competence-matcher/src/middleware/match.ts new file mode 100644 index 000000000..86c44bb56 --- /dev/null +++ b/src/competence-matcher/src/middleware/match.ts @@ -0,0 +1,406 @@ +import { Request, Response, NextFunction } from 'express'; +import { PATHS } from '../server'; +import { getDB } from '../utils/db'; +import workerManager from '../worker/worker-manager'; +import { + CompetenceInput, + GroupedMatchResults, + MatchingJob, + MatchingTask, + ResourceListInput, + ResourceRanking, + TaskOverview, +} from '../utils/types'; +import { handleCreateResourceList } from './resource'; + +export function matchCompetenceList(req: Request, res: Response, next: NextFunction): void { + try { + let listId: string; + let list: ResourceListInput; + let taskInput: MatchingTask[]; + const db = getDB(req.dbName!); + + /**-------------------------------------------- + * Checks + *---------------------------------------------*/ + if ('competenceList' in req.body) { + // Handle case where competenceList is provided + const { competenceList, tasks } = req.body as { + competenceList: ResourceListInput; + tasks: MatchingTask[]; + }; + list = competenceList; + taskInput = tasks; + } else if ('competenceListId' in req.body) { + // Handle case where competenceListId is provided + const { competenceListId, tasks } = req.body as { + competenceListId: string; + tasks: MatchingTask[]; + }; + listId = competenceListId; + taskInput = tasks; + } + + if (!listId! && !list!) { + res.status(400).json({ + error: 'Either competenceListId or competenceList must be provided.', + }); + return; + } + + if (!taskInput! || !Array.isArray(taskInput) || taskInput?.length === 0) { + res.status(400).json({ + error: 'An array of tasks must be provided for matching.', + }); + return; + } + + if (listId! && !(typeof listId === 'string')) { + res.status(400).json({ + error: 'competenceListId must be an UUIDStrings.', + }); + return; + } else if ( + list! && + (!Array.isArray(list) || + !list.every( + (entry) => typeof entry === 'object' && !Array.isArray(entry) && entry !== null, + )) + ) { + res.status(400).json({ + error: 'competenceList must be an array of ResourceInput objects.', + }); + return; + } + + /**-------------------------------------------- + * Case existing competenceListId was passed + *---------------------------------------------*/ + if (listId!) { + // Check if the competence list exists + const competenceLists = db.getAvailableResourceLists(); + if (!competenceLists.includes(listId)) { + res.status(404).json({ + error: `Competence list with ID ${listId} not found.`, + }); + return; + } + + const jobId = db.createJob(listId); + const job: MatchingJob = { + jobId, + dbName: req.dbName!, + listId, + resourceId: undefined, // For now, we don't support matching against a single resource + tasks: taskInput.map((task) => { + return { + taskId: task.taskId, + name: task.name, + description: task.description, + executionInstructions: task.executionInstructions, + requiredCompetencies: (task.requiredCompetencies ?? []).map((competence) => + typeof competence === 'string' + ? (competence as string) + : ({ + competenceId: competence.competenceId, + name: competence.name, + description: competence.description, + externalQualificationNeeded: competence.externalQualificationNeeded, + renewTime: competence.renewTime, + proficiencyLevel: competence.proficiencyLevel, + qualificationDates: competence.qualificationDates, + lastUsages: competence.lastUsages, + } as CompetenceInput), + ) as string[] | CompetenceInput[], + }; + }), + }; + + workerManager.enqueue(job, 'matcher'); + + // Respond with jobId in location header + res + .setHeader('Location', `${PATHS.match}/jobs/${jobId}`) + // Accepted response + .status(202) + .json({ jobId, status: 'pending' }); + return; + } + + /**-------------------------------------------- + * Case new Competence-List was passed + *---------------------------------------------*/ + // Create a new competence list + const matchingJobId = db.createJob(); + if (list!) { + db.updateJobStatus(matchingJobId, 'preprocessing'); + handleCreateResourceList(req.dbName!, list, (job, code, jobId) => { + try { + // Embedding fails -> no matching possible (i.e. fail the matching job) + if (code !== 0) { + db.updateJobStatus(matchingJobId, 'failed'); + return; + } + db.updateJobStatus(matchingJobId, 'pending'); + + // Retrieve the competence list ID + const { referenceId: listId } = db.getJob(jobId); + // Create the matching job + const matchingJob: MatchingJob = { + jobId: matchingJobId, + dbName: req.dbName!, + listId, + resourceId: undefined, // For now, we don't support matching against a single resource + tasks: taskInput.map((task) => { + return { + taskId: task.taskId, + name: task.name, + description: task.description, + executionInstructions: task.executionInstructions, + requiredCompetencies: (task.requiredCompetencies ?? []).map((competence) => + typeof competence === 'string' + ? (competence as string) + : ({ + competenceId: competence.competenceId, + name: competence.name, + description: competence.description, + externalQualificationNeeded: competence.externalQualificationNeeded, + renewTime: competence.renewTime, + proficiencyLevel: competence.proficiencyLevel, + qualificationDates: competence.qualificationDates, + lastUsages: competence.lastUsages, + } as CompetenceInput), + ) as string[] | CompetenceInput[], + }; + }), + }; + // Enqueue the matching job + workerManager.enqueue(matchingJob, 'matcher'); + } catch (error) { + db.updateJobStatus(matchingJobId, 'failed'); + console.error('Error creating (inline) matching job:', error); + } + }); + + res + .setHeader('Location', `${PATHS.match}/jobs/${matchingJobId}`) + .status(202) + .json({ jobId: matchingJobId, status: 'pending' }); + } + } catch (error) { + console.error('Error matching:', error); + res.status(500).json({ error: 'Internal Server Error' }); + } +} + +export function getMatchJobResults(req: Request, res: Response, next: NextFunction): void { + // Get jobId from path + const { jobId } = req.params; + // Get sorter from query params + const requestedSorter = req.query.rankBy as string | undefined; + const sorter = requestedSorter == 'bestFit' ? 'bestFit' : 'avgFit'; // Default to avgFit + const db = getDB(req.dbName!); + + // Check if job exists + const job = db.getJob(jobId); + if (!job) { + res.status(404).json({ error: `Job with ID ${jobId} not found.` }); + return; + } + + // Job can be pending, preprocessing, running, completed, or failed + switch (job.status) { + case 'pending': + case 'running': + case 'preprocessing': + res.status(202).json({ + jobId, + status: job.status, + }); + return; + case 'failed': + res.status(500).json({ + error: `Job with ID ${jobId} failed.`, + }); + return; + case 'completed': + // Proceed to return results below + break; + default: + console.error(`Unexpected job status: ${job.status} for jobId: ${jobId}`); + res.status(500).json({ + error: `Job with ID ${jobId} failed.`, + }); + return; + } + + // Return match results + const results = db.getMatchResults(jobId); + + const tasks: TaskOverview = results.reduce((acc, result) => { + const { taskId, taskText } = result; + // Check if task already exists in the overview + if (!acc.some((task) => task.taskId === taskId)) { + acc.push({ taskId, taskText }); + } + return acc; + }, [] as TaskOverview); + + // Get the structure of the results + let groupedResults: ResourceRanking = results.reduce((acc, result) => { + const { taskId, competenceId, resourceId, distance, text, type, alignment, reason } = result; + + // resourceId + let resourceGroup = acc.find((group) => group.resourceId === resourceId); + if (!resourceGroup) { + resourceGroup = { + resourceId, + taskMatchings: [], + avgTaskMatchProbability: 0, + avgBestFitTaskMatchProbability: 0, + contradicting: false, + }; + acc.push(resourceGroup); + } + // taskMatchings + let taskMatches = resourceGroup.taskMatchings.find((task) => task.taskId === taskId); + if (!taskMatches) { + taskMatches = { + taskId, + competenceMatchings: [], + maxMatchProbability: 0, + maxBestFitMatchProbability: 0, + }; + resourceGroup.taskMatchings.push(taskMatches); + } + + // competenceMatchings + let competenceMatches = taskMatches.competenceMatchings.find( + (competence) => competence.competenceId === competenceId, + ); + if (!competenceMatches) { + competenceMatches = { + competenceId, + matchings: [], + avgMatchProbability: 0, + avgBestFitMatchProbability: 0, + }; + taskMatches.competenceMatchings.push(competenceMatches); + } + + // Add the match to competenceMatches + competenceMatches.matchings.push({ + text, + type: type as 'name' | 'description' | 'proficiencyLevel', + matchProbability: distance, + alignment: alignment as 'contradicting' | 'neutral' | 'aligning', + reason: reason || undefined, + }); + + return acc; + }, [] as ResourceRanking); + + // Aggregate and sort + groupedResults = groupedResults + .map((resourceGroup) => { + const { resourceId, taskMatchings, avgTaskMatchProbability, avgBestFitTaskMatchProbability } = + resourceGroup; + + const newTaskMatchings = taskMatchings.map((taskGroup) => { + const { taskId, competenceMatchings, maxMatchProbability, maxBestFitMatchProbability } = + taskGroup; + + const newCompetenceMatchings = competenceMatchings.map((competenceGroup) => { + const { competenceId, matchings, avgMatchProbability, avgBestFitMatchProbability } = + competenceGroup; + + // Calculate average match probability for this competence (i.e. avg over all parts of this competence) + const totalMatchProbability = matchings.reduce( + (sum, match) => sum + match.matchProbability, + 0, + ); + + let numberOfBestFits = 0; + const totalBestFitMatchProbability = matchings.reduce((sum, match) => { + if (match.alignment === 'aligning') { + numberOfBestFits++; + return sum + match.matchProbability; + } + return sum; + }, 0); + + // Return sorted + return { + competenceId, + matchings: matchings.sort((a, b) => b.matchProbability - a.matchProbability), + avgMatchProbability: totalMatchProbability / matchings.length, + avgBestFitMatchProbability: + numberOfBestFits > 0 ? totalBestFitMatchProbability / numberOfBestFits : 0, // If no best fit, set to 0 + }; + }); + + // Return sorted + return { + taskId, + competenceMatchings: newCompetenceMatchings.sort((a, b) => { + const key = sorter === 'bestFit' ? 'avgBestFitMatchProbability' : 'avgMatchProbability'; + return b[key] - a[key]; + }), + maxMatchProbability: Math.max( + ...newCompetenceMatchings.map((c) => c.avgMatchProbability), + ), + maxBestFitMatchProbability: Math.max( + ...newCompetenceMatchings.map((c) => c.avgBestFitMatchProbability), + ), + }; + }); + + // Calculate average task match probability for this resource + const totalTaskMatchProbability = newTaskMatchings.reduce( + (sum, task) => sum + task.maxMatchProbability, + 0, + ); + const totalBestFitTaskMatchProbability = newTaskMatchings.reduce( + (sum, task) => sum + task.maxBestFitMatchProbability, + 0, + ); + + // Return sorted + return { + resourceId, + taskMatchings: newTaskMatchings.sort((a, b) => { + const key = sorter === 'bestFit' ? 'maxBestFitMatchProbability' : 'maxMatchProbability'; + return b[key] - a[key]; + }), + avgTaskMatchProbability: totalTaskMatchProbability / newTaskMatchings.length, + avgBestFitTaskMatchProbability: + totalBestFitTaskMatchProbability / newTaskMatchings.length || 0, // If no best fit, set to 0 + contradicting: newTaskMatchings.some((task) => + task.competenceMatchings.some((competence) => + competence.matchings.some((match) => match.alignment === 'contradicting'), + ), + ), + }; + }) + .sort((a, b) => { + const key = + sorter === 'bestFit' ? 'avgBestFitTaskMatchProbability' : 'avgTaskMatchProbability'; + + // Sort in two levels: Contradicting, key + // First not contradicting resources, then contradicting ones + // Case one is contradicting, the other is not + if (a.contradicting !== b.contradicting) { + return a.contradicting ? 1 : -1; // Non-contradicting first + } + // Both are contradicting or both are not + // Sort by the key + return b[key] - a[key]; + }); + + const load: GroupedMatchResults = { + tasks, + resourceRanking: groupedResults, + }; + + res.status(200).json(load); +} diff --git a/src/competence-matcher/src/middleware/resource.ts b/src/competence-matcher/src/middleware/resource.ts new file mode 100644 index 000000000..cf5d9848e --- /dev/null +++ b/src/competence-matcher/src/middleware/resource.ts @@ -0,0 +1,213 @@ +import { Request, Response, NextFunction } from 'express'; +import { PATHS } from '../server'; +import { getDB } from '../utils/db'; +import workerManager from '../worker/worker-manager'; +import { splitSemantically } from '../tasks/semantic-split'; +import { CompetenceInput, EmbeddingJob, EmbeddingTask, ResourceInput } from '../utils/types'; + +export function getResourceLists(req: Request, res: Response, next: NextFunction): void { + try { + const db = getDB(req.dbName!); + + const availableResourceLists = db.getAvailableResourceLists(); + + res.status(200).json(availableResourceLists); // string[] + } catch (error) { + console.error('Error retrieving resource lists:', error); + res.status(500).json({ error: 'Internal Server Error' }); + } +} + +export function getResourceList(req: Request, res: Response, next: NextFunction): void { + try { + const db = getDB(req.dbName!); + const resourceListId = req.params.resourceListId; + + if (!resourceListId) { + res.status(400).json({ error: 'Resource list ID is required' }); + return; + } + + let resourceList; + try { + resourceList = db.getResourceList(resourceListId); + } catch (error) { + res.status(404).json({ error: 'Resource list not found' }); + return; + } + + res.status(200).json(resourceList); + // type: + // resourceList: { + // competenceListId: string; + // resources: Array<{ + // resourceId: string; + // competencies: Array<{ + // competenceId: string; + // name?: string; + // description?: string; + // externalQualificationNeeded: boolean; + // renewTime?: number; + // proficiencyLevel?: string; + // qualificationDates: string[]; + // lastUsages: string[]; + // }>; + // }>; + // } + } catch (error) { + console.error('Error retrieving resource list:', error); + res.status(500).json({ error: 'Internal Server Error' }); + } +} + +// Helper function to handle the creation logic +export async function handleCreateResourceList( + dbName: string, + resources: ResourceInput[], + onWorkerExit?: (job: any, code: number, jobId: string) => void, +): Promise<{ jobId: string; status: string }> { + let resourceIds: string[] = []; + let competences: CompetenceInput[][] = []; + + // Validate and extract data + resources.forEach(({ resourceId, competencies }: ResourceInput) => { + if (!resourceId || typeof resourceId !== 'string') { + throw new Error('Invalid resourceId in request body'); + } + if (!Array.isArray(competencies)) { + throw new Error('Invalid competencies in request body'); + } + resourceIds.push(resourceId); + const checkedCompetences = competencies.map((c: CompetenceInput) => { + if (!c.competenceId || typeof c.competenceId !== 'string') { + throw new Error('Invalid competenceId in request body'); + } + return { + competenceId: c.competenceId, + name: c.name, + description: c.description, + externalQualificationNeeded: c.externalQualificationNeeded, + renewTime: c.renewTime, + proficiencyLevel: c.proficiencyLevel, + qualificationDates: c.qualificationDates, + lastUsages: c.lastUsages, + }; + }); + competences.push(checkedCompetences); + }); + + // Create a new resource list in the database + let listId: string; + let jobId: string; + const db = getDB(dbName); + db.atomicStep(() => { + listId = db.createResourceList(); + resourceIds.forEach((resourceId) => { + db.addResource(listId, resourceId); + }); + competences.forEach((competenceArray, resourceIndex) => { + competenceArray.forEach((competence) => { + db.addCompetence(listId, resourceIds[resourceIndex], competence); + }); + }); + jobId = db.createJob(listId); + }); + + // Prepare embedding tasks + const descriptionEmbeddingInput = competences + .map((competenceArray, resourceIndex) => { + return competenceArray.map((competence) => { + return { + listId: listId!, + resourceId: resourceIds[resourceIndex], + competenceId: competence.competenceId, + text: competence.description, + type: 'description', + }; + }); + }) + .flat() as EmbeddingTask[]; + + // Workaround for now + // Ideally, the worker should handle the splitting as well + db.updateJobStatus(jobId!, 'preprocessing'); + let job: EmbeddingJob | undefined; + + splitSemantically(descriptionEmbeddingInput) + .then((tasks) => { + job = { + jobId: jobId!, + dbName: dbName, + tasks, + }; + }) + .catch((err) => { + console.error('Error splitting semantically:', err); + job = { + jobId: jobId!, + dbName: dbName, + tasks: descriptionEmbeddingInput, + }; + }) + .finally(() => { + db.updateJobStatus(jobId!, 'pending'); + workerManager.enqueue(job!, 'embedder', { + onExit: (job, code) => onWorkerExit?.(job, code, jobId!), + }); + }); + + return { jobId: jobId!, status: 'pending' }; +} + +export function createResourceList(req: Request, res: Response, next: NextFunction): void { + if (!Array.isArray(req.body) || req.body.length === 0) { + res.status(400).json({ error: 'Invalid request body. Expected an array of resources.' }); + return; + } + try { + handleCreateResourceList(req.dbName!, req.body) + .then(({ jobId, status }) => { + res + .setHeader('Location', `${PATHS.resource}/jobs/${jobId}`) + .status(202) + .json({ jobId, status }); + }) + .catch((error) => { + console.error('Error adding resource list:', error); + res.status(400).json({ error: error.message || 'Invalid request body format' }); + }); + } catch (error) { + console.error('Error processing request body:', error); + res.status(400).json({ error: 'Invalid request body format' }); + } +} + +export function getJobStatus(req: Request, res: Response) { + try { + const db = getDB(req.dbName!); + const job = db.getJob(req.params.jobId); + + switch (job.status) { + case 'pending': + case 'preprocessing': + case 'running': + res.status(202).json({ jobId: job.jobId, status: job.status }); // both strings + return; + case 'completed': + res + .status(201) + .setHeader('Location', `${PATHS.resource}/${job.referenceId}`) + .json({ jobId: job.jobId, status: job.status, competenceListId: job.referenceId }); + return; + case 'failed': + res.status(500).json({ jobId: job.jobId, status: job.status }); //both strings + return; + default: + res.status(500).json({ error: 'Internal Server Error' }); + return; + } + } catch (err) { + // console.error(err); + res.status(404).json({ error: 'Job not found' }); + } +} From e59d005ccec3d26b22897f9bc2ca70c4b0993813 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:41:27 +0200 Subject: [PATCH 07/48] Add routes for resource and match handling in competence-matcher --- src/competence-matcher/src/routes/match.ts | 21 ++++ src/competence-matcher/src/routes/resource.ts | 34 ++++++ src/competence-matcher/src/server.ts | 112 ++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 src/competence-matcher/src/routes/match.ts create mode 100644 src/competence-matcher/src/routes/resource.ts create mode 100644 src/competence-matcher/src/server.ts diff --git a/src/competence-matcher/src/routes/match.ts b/src/competence-matcher/src/routes/match.ts new file mode 100644 index 000000000..0cb894f73 --- /dev/null +++ b/src/competence-matcher/src/routes/match.ts @@ -0,0 +1,21 @@ +import express from 'express'; +import { getMatchJobResults, matchCompetenceList } from '../middleware/match'; + +const router = express.Router(); + +// .all() +// .get() +// .put() +// .post(); +// .patch(); +// .delete(); + +// '/:resource-list-id' +// '/:resource-list-id/:resource-id' +// '/:resource-list-id/:resource-id/:competence-id' + +router.route('/jobs/').post(matchCompetenceList); + +router.route('/jobs/:jobId').get(getMatchJobResults); + +export default router; diff --git a/src/competence-matcher/src/routes/resource.ts b/src/competence-matcher/src/routes/resource.ts new file mode 100644 index 000000000..2096119ec --- /dev/null +++ b/src/competence-matcher/src/routes/resource.ts @@ -0,0 +1,34 @@ +import express from 'express'; +import { + createResourceList, + getJobStatus, + getResourceList, + getResourceLists, +} from '../middleware/resource'; +import { config } from '../config'; + +const { multipleDBs } = config; + +const router = express.Router(); + +// .all() +// .get() +// .put() +// .post(); +// .patch(); +// .delete(); + +// '/:resource-list-id' +// '/:resource-list-id/:resource-id' +// '/:resource-list-id/:resource-id/:competence-id' + +// Getting an overview of all resource lists should be tennant-specific, +// so it should only be available if multipleDBs is true. +if (multipleDBs) router.route('/').get(getResourceLists); + +router.route('/jobs').post(createResourceList); +router.route('/jobs/:jobId').get(getJobStatus); + +router.route('/:resourceListId').get(getResourceList); + +export default router; diff --git a/src/competence-matcher/src/server.ts b/src/competence-matcher/src/server.ts new file mode 100644 index 000000000..158a5faac --- /dev/null +++ b/src/competence-matcher/src/server.ts @@ -0,0 +1,112 @@ +import express from 'express'; + +import ResourceRouter from './routes/resource'; +import MatchRouter from './routes/match'; +import { config } from './config'; +import { dbHeader } from './middleware/db-locator'; +import { requestLogger } from './middleware/logging'; +import Embedding from './tasks/embedding'; +import { ensureAllOllamaModelsAreAvailable } from './utils/ollama'; +import { splitSemantically } from './tasks/semantic-split'; +import { createWorker } from './utils/worker'; +import { ensureAllHuggingfaceModelsAreAvailable } from './utils/huggingface'; +import { EmbeddingTask } from './utils/types'; + +const { port: PORT } = config; + +export const PATHS = { + resource: '/resource-competence-list', + match: '/matching-task-to-resource', +}; + +// Extend Express Request interface +declare module 'express-serve-static-core' { + interface Request { + dbName?: string; + } +} + +async function main() { + const app = express(); + + // Ensure all required models are available + // Hugging Face models + await ensureAllHuggingfaceModelsAreAvailable(); + // Ollama models + await ensureAllOllamaModelsAreAvailable(); + + // const tasks = [ + // { + // listId: 'test-list', + // resourceId: 'test-resource', + // competenceId: 'test-competence', + // text: 'This competence covers the principles and best practices of designing scalable software systems. It includes high-level architecture, component interaction, and trade-off analysis. Practitioners will need to balance performance, reliability, and maintainability when making design decisions.', + // type: 'description', + // }, + // { + // listId: 'test-list', + // resourceId: 'test-resource', + // competenceId: 'test-competence', + // text: 'This competence focuses on building and maintaining RESTful and GraphQL APIs. It covers endpoint design, versioning strategies, and error handling. Learners will gain hands-on experience with request validation, authentication, and performance tuning.', + // type: 'description', + // }, + // { + // listId: 'test-list', + // resourceId: 'test-resource', + // competenceId: 'test-competence', + // text: 'This competence entails designing effective database schemas to represent business domains. It involves normalization, denormalization, and indexing strategies for optimal query performance. Real-world scenarios will illustrate when to choose relational versus NoSQL approaches.', + // type: 'description', + // }, + // { + // listId: 'test-list', + // resourceId: 'test-resource', + // competenceId: 'test-competence', + // text: 'This competence covers fundamental security principles for web applications. Topics include authentication, authorization, encryption, and secure configuration management. Practical exercises demonstrate common vulnerabilities and how to mitigate them effectively.', + // type: 'description', + // }, + // { + // listId: 'test-list', + // resourceId: 'test-resource', + // competenceId: 'test-competence', + // text: "This person can not swim at all. Please don't let them close water at all.", + // type: 'description', + // }, + // ] as EmbeddingTask[]; + + // const testworker = createWorker('test'); + // testworker.on('message', (message) => { + // console.log(message); + // }); + // testworker.postMessage(tasks); + + // const result = await splitSemantically(tasks); + // console.log(result); + + // Parse JSON + app.use(express.json()); + // Parse URL-encoded data + app.use(express.urlencoded({ extended: true })); + // Middleware to handle database header + app.use(dbHeader); + // Logging middleware + // app.use(requestLogger); + + // Hello World + app.get('/', (req, res, next) => { + console.log('Received a GET request on /'); + res.status(200).send('Welcome to the Matching Server'); + }); + + // Routes + app.use(PATHS.resource, ResourceRouter); + app.use(PATHS.match, MatchRouter); + + app.listen(PORT, () => { + console.log(`Matching-Server is running on http://localhost:${PORT}`); + }); +} + +main().catch((error) => { + console.error('Server shutdown due to error:', error); + process.exit(1); +}); From ae5edc1c198805c759553249f501f1fbcf9bfe0d Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:41:43 +0200 Subject: [PATCH 08/48] Add embedding, reasoning, semantic splitting, and zero-shot classification tasks --- src/competence-matcher/src/tasks/embedding.ts | 48 ++++++ src/competence-matcher/src/tasks/reason.ts | 43 +++++ .../src/tasks/semantic-split.ts | 155 ++++++++++++++++++ .../src/tasks/semantic-zeroshot.ts | 43 +++++ 4 files changed, 289 insertions(+) create mode 100644 src/competence-matcher/src/tasks/embedding.ts create mode 100644 src/competence-matcher/src/tasks/reason.ts create mode 100644 src/competence-matcher/src/tasks/semantic-split.ts create mode 100644 src/competence-matcher/src/tasks/semantic-zeroshot.ts diff --git a/src/competence-matcher/src/tasks/embedding.ts b/src/competence-matcher/src/tasks/embedding.ts new file mode 100644 index 000000000..0f320214c --- /dev/null +++ b/src/competence-matcher/src/tasks/embedding.ts @@ -0,0 +1,48 @@ +import { + PipelineType, + FeatureExtractionPipeline, + FeatureExtractionPipelineOptions, +} from '@huggingface/transformers'; +import { config } from '../config'; +import { TransformerPipeline } from '../utils/model'; +import { TransformerPipelineOptions } from '../utils/types'; + +export default class Embedding extends TransformerPipeline { + protected static override getPipelineOptions(): TransformerPipelineOptions { + return { + task: 'feature-extraction' as PipelineType, + model: config.embeddingModel, + options: { + // progress_callback: (progress) => { + // console.log(`Embedding progress: ${progress}`); + // }, + }, + }; + } + + /** + * Turn text (or array of text) into one or more vectors. + * @param texts string or array + * @param opts override default mean-pooling/normalize + */ + public static async embed( + texts: string | string[], + opts: FeatureExtractionPipelineOptions = { pooling: 'mean', normalize: true }, + ): Promise { + // Pipeline is loaded & cached by TransformerPipeline + const pipe = await this.getInstance(); + const input = Array.isArray(texts) ? texts : [texts]; + // call the pipeline + const raw = await pipe(input, opts); + const arrs = (Array.isArray(raw) ? raw : [raw]).map((tensor) => { + // each tensor.data is a Float32Array + const data = (tensor as any).data as Float32Array; + const vec = Array.from(data); + if (vec.length !== config.embeddingDim) { + throw new Error(`Expected embeddingDim=${config.embeddingDim}, got ${vec.length}`); + } + return vec; + }) as number[][]; + return arrs; + } +} diff --git a/src/competence-matcher/src/tasks/reason.ts b/src/competence-matcher/src/tasks/reason.ts new file mode 100644 index 000000000..bb171e083 --- /dev/null +++ b/src/competence-matcher/src/tasks/reason.ts @@ -0,0 +1,43 @@ +import { ollama } from '../utils/ollama'; +import { config } from '../config'; +import { MATCH_REASON as intructPrompt } from '../utils/prompts'; +import type { Message } from 'ollama'; +import { Match } from '../utils/types'; + +const { reasonModel } = config; + +export async function addReason(matches: T[], targetText: string): Promise { + if (matches.length === 0) { + return matches; // No matches to reason about + } + const reasonMatches: T[] = await Promise.all( + matches.map(async (match) => { + const messages: Message[] = [ + ...intructPrompt, + { + role: 'user', + content: `Task: ${targetText}\nCompetence: ${match.text}\nSimilarity Score: ${match.distance}`, + }, + ]; + try { + const response = await ollama.chat({ + model: reasonModel, + messages: messages, + }); + + // Extract the reason from the response + const reason = response.message.content.trim(); + return { + ...match, + reason, // Add the reason to the match + }; + } catch (error) { + console.error('Error during reasoning:', error); + // If there's an error, just keep the original match without a reason + return match; + } + }), + ); + + return reasonMatches; +} diff --git a/src/competence-matcher/src/tasks/semantic-split.ts b/src/competence-matcher/src/tasks/semantic-split.ts new file mode 100644 index 000000000..c07583a8b --- /dev/null +++ b/src/competence-matcher/src/tasks/semantic-split.ts @@ -0,0 +1,155 @@ +import { ollama } from '../utils/ollama'; +import { config } from '../config'; +import { SEMANTIC_SPLITTER as intructPrompt } from '../utils/prompts'; +import type { Message } from 'ollama'; +import { EmbeddingTask } from '../utils/types'; + +const { splittingModel, splittingSymbol, ollamaBatchSize } = config; + +const MIN_TEXT_LENGTH = 60; // Minimum length of text to consider for splitting (I noticed that text inputs that are too short often lead to errors in the splitting process - and since they are so small, they can be embedded directly without splitting) + +// async function ollamaChat(messages: Array<{ role: string; content: string }>) { +// const res = await fetch(`${ollamaPath}/api/chat`, { +// method: 'POST', +// headers: { 'Content-Type': 'application/json' }, +// body: JSON.stringify({ +// model: config.ollamaSplittingModel, +// messages, +// stream: false, +// }), +// }); +// if (!res.ok) { +// const t = await res.text(); +// throw new Error(`Ollama REST chat failed: ${res.status} ${t}`); +// } +// const data = (await res.json()) as { message: { content: string } }; +// return data.message.content as string; +// } + +// export async function splitSemantically(tasks: EmbeddingTask[]): Promise { +// const splittedTasks: EmbeddingTask[] = []; + +// // First, for each task, decide whether it needs splitting (filteredMessages) +// // or can be passed through as‐is. +// const toSplit: { task: EmbeddingTask; messages: Message[] }[] = []; +// for (const task of tasks) { +// const messages: Message[] = [...intructPrompt, { role: 'user', content: task.text }]; + +// // Filter out too‐short or empty +// const filtered = messages.filter(({ content }) => { +// const c = content.replace(/\s+/g, ' ').trim(); +// return c.length > MIN_TEXT_LENGTH; +// }); + +// if (filtered.length === 0) { +// // no splitting needed +// splittedTasks.push({ ...task, text: task.text }); +// } else { +// toSplit.push({ task, messages: filtered }); +// } +// } + +// // Now process in batches of size ollamaBatchSize +// for (let i = 0; i < toSplit.length; i += ollamaBatchSize) { +// const batch = toSplit.slice(i, i + ollamaBatchSize); + +// // Kick off all requests in this batch in parallel +// const promises = batch.map(async ({ task, messages }) => { +// try { +// const response = await ollamaChat(messages); +// const parts = response +// .replace(/\s+/g, ' ') +// .trim() +// .split(splittingSymbol) +// .map((p: string) => p.trim()) +// .filter((p: string) => p.length > 0); + +// if (parts.length === 0) { +// // fallback to original text if splitting yields nothing +// splittedTasks.push({ ...task, text: task.text }); +// } else { +// for (const part of parts) { +// splittedTasks.push({ ...task, text: part }); +// } +// } +// } catch (err) { +// console.error('Error during semantic splitting:', err); +// // in case of error, include the original +// splittedTasks.push({ ...task, text: task.text }); +// } +// }); + +// // Wait for this batch to finish before launching the next +// await Promise.all(promises); +// } + +// return splittedTasks; +// } + +// _______________________________________________ + +export async function splitSemantically(tasks: EmbeddingTask[]): Promise { + const splittedTasks: EmbeddingTask[] = []; + const toSplit: { task: EmbeddingTask; messages: Message[] }[] = []; + + for (const task of tasks) { + const messages: Message[] = [ + ...intructPrompt, + { + role: 'user', + content: task.text, + }, + ]; + + // Filter out empty, whitespace-only and too short messages + const filteredMessages = messages.filter((message) => { + const content = message.content + // Remove excessive whitespace + .replace(/\s+/g, ' ') + // Remove potential occurences of splittingSymbol in upper and lower case + .replace(new RegExp(splittingSymbol, 'gi'), ' ') + // Trim leading and trailing whitespace + .trim(); + return content !== '' && content.length > MIN_TEXT_LENGTH; + }); + + if (filteredMessages.length === 0) { + splittedTasks.push({ ...task, text: task.text }); + } else { + toSplit.push({ task, messages: filteredMessages }); + } + } + + // Process in batches + for (let i = 0; i < toSplit.length; i += ollamaBatchSize) { + const batch = toSplit.slice(i, i + ollamaBatchSize); + + const promises = batch.map(async ({ task, messages }) => { + try { + const response = await ollama.chat({ + model: splittingModel, + messages, + }); + const parts = response.message.content + .split(splittingSymbol) + .map((part: string) => part.trim()) + .filter((part: string) => part !== ''); + + if (parts.length === 0) { + splittedTasks.push({ ...task, text: task.text }); + } else { + for (const part of parts) { + splittedTasks.push({ ...task, text: part }); + } + } + } catch (error) { + console.error('Error during semantic splitting:', error); + splittedTasks.push({ ...task, text: task.text }); + } + }); + + await Promise.all(promises); + } + + return splittedTasks; +} diff --git a/src/competence-matcher/src/tasks/semantic-zeroshot.ts b/src/competence-matcher/src/tasks/semantic-zeroshot.ts new file mode 100644 index 000000000..23556ee84 --- /dev/null +++ b/src/competence-matcher/src/tasks/semantic-zeroshot.ts @@ -0,0 +1,43 @@ +import { + PipelineType, + TextClassificationPipeline, + ZeroShotClassificationPipeline, +} from '@huggingface/transformers'; +import { config } from '../config'; +import { TransformerPipeline } from '../utils/model'; +import { TransformerPipelineOptions } from '../utils/types'; + +export default class ZeroShot extends TransformerPipeline { + protected static override getPipelineOptions(): TransformerPipelineOptions { + return { + task: 'zero-shot-classification' as PipelineType, + model: config.nliModel, + options: { + // progress_callback: (progress) => { + // console.log(`Embedding progress: ${progress}`); + // }, + model_file_name: 'model.onnx', + use_external_data_format: true, + local_files_only: true, + }, + }; + } + + /** + * Classify text against a set of labels using zero-shot classification. + * @param text The text to classify. + * @param labels The labels to classify against. + * @param hypothesisTemplate Optional hypothesis template for classification - should include '{}' as placeholder for label. + */ + public static async classify(text: string, labels?: string[], hypothesisTemplate?: string) { + const _labels = labels || [ + 'contradicting', + 'aligning, thus a good match', + 'neither aligning nor contradicting', + ]; + const hypothesis_template = + hypothesisTemplate || 'Task description and Skill/Capability descriptions are {}.'; + const pipe = await this.getInstance(); + return pipe(text, _labels, { hypothesis_template }); + } +} From bb6d045f102fe21b38099f8d5a31631ac3b181f4 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:41:48 +0200 Subject: [PATCH 09/48] Add script to convert ONNX model weights to external data format --- .../tools/onnx-model-external-data.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/competence-matcher/tools/onnx-model-external-data.py diff --git a/src/competence-matcher/tools/onnx-model-external-data.py b/src/competence-matcher/tools/onnx-model-external-data.py new file mode 100644 index 000000000..f2c74c216 --- /dev/null +++ b/src/competence-matcher/tools/onnx-model-external-data.py @@ -0,0 +1,32 @@ +import onnx +import argparse +import sys + +def main(): + parser = argparse.ArgumentParser(description="Convert ONNX model weights to external data format.") + parser.add_argument('--input', '-i', required=True, help='Path to the input ONNX model') + parser.add_argument('--output', '-o', required=False, help='Path to save the modified ONNX model (defaults to input path)', default=None) + + # If only -h/--help is present, show help and exit + if len(sys.argv) == 2 and sys.argv[1] in ('-h', '--help'): + parser.print_help() + sys.exit(0) + + args = parser.parse_args() + + if args.output is None: + args.output = args.input + + model = onnx.load(args.input) + + onnx.external_data_helper.convert_model_to_external_data( + model, + convert_attribute=True, + all_tensors_to_one_file=True, + location="model.onnx_data" + ) + + onnx.save(model, args.output) + +if __name__ == "__main__": + main() From a68693922fa0feb185a38cfd40613dd9bf39f81e Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 31 Jul 2025 18:59:02 +0200 Subject: [PATCH 10/48] Update .gitignore to exclude competence-matcher model files and environment configuration --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d3704ec24..f59c76fe1 100644 --- a/.gitignore +++ b/.gitignore @@ -64,4 +64,5 @@ CLAUDE.md # Matching models & DB src/competence-matcher/dist src/competence-matcher/src/db/dbs -src/competence-matcher/src/models \ No newline at end of file +src/competence-matcher/src/models +src/competence-matcher/.env From 5c37c496b4b3ae7a0075d20dfad521809b94ce92 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 31 Jul 2025 22:22:56 +0200 Subject: [PATCH 11/48] Enhance development setup and configuration - Update dev script in package.json to watch for .env changes - Add dotenv dependency for environment variable management - Modify config to include ollamaBearerToken from environment variables - Ensure asynchronous model loading in ensureAllHuggingfaceModelsAreAvailable - Include ollamaBearerToken in Ollama instance headers --- src/competence-matcher/package.json | 3 ++- src/competence-matcher/src/config.ts | 6 +++++- src/competence-matcher/src/utils/huggingface.ts | 4 ++-- src/competence-matcher/src/utils/ollama.ts | 3 ++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index 032b82a8d..af9e191db 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -4,7 +4,7 @@ "description": "Matching microservice that allows to allows to define and match on data criteria", "main": "dist/server.js", "scripts": { - "dev": "ts-node-dev --respawn --transpile-only src/server.ts", + "dev": "ts-node-dev --respawn --transpile-only --watch .env src/server.ts", "build": "tsc", "run-production": "node dist/server.js" }, @@ -24,6 +24,7 @@ "homepage": "https://github.com/PROCEED-Labs/proceed#readme", "dependencies": { "@huggingface/transformers": "^3.5.2", + "dotenv": "^17.2.1", "express": "^5.1.0", "ollama": "^0.5.16", "sqlite-vec": "^0.1.7-alpha.2" diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index 02a8a7001..e33adec02 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -1,15 +1,19 @@ +import dotenv from 'dotenv'; +dotenv.config({ path: '.env' }); + import * as os from 'node:os'; export const config = { dbPath: process.env.DB_PATH || 'src/db/dbs/', embeddingModel: process.env.EMBEDDING_MODEL || 'onnx-community/Qwen3-Embedding-0.6B-ONNX', embeddingDim: parseInt(process.env.EMBEDDING_DIM || '1024', 10), - nliModel: process.env.NLI_MODEL || './src/models/roberta_mnli_onnx', + nliModel: process.env.NLI_MODEL || 'Maxi-Lein/roberta-large-mnli-onnx', modelCache: process.env.MODEL_CACHE || 'src/models/', useGPU: process.env.USE_GPU === 'true' || false, port: parseInt(process.env.PORT || '8501', 10), multipleDBs: process.env.MULTIPLE_DBS === 'true' || false, ollamaPath: process.env.OLLAMA_PATH || 'http://localhost:11434', + ollamaBearerToken: process.env.OLLAMA_BEARER_TOKEN || '', ollamaBatchSize: parseInt(process.env.OLLAMA_BATCH_SIZE || '5', 10), splittingModel: process.env.SPLITTING_MODEL || 'llama3.2', reasonModel: process.env.REASON_MODEL || 'llama3.2', diff --git a/src/competence-matcher/src/utils/huggingface.ts b/src/competence-matcher/src/utils/huggingface.ts index 3d6d5128f..ceff7b42f 100644 --- a/src/competence-matcher/src/utils/huggingface.ts +++ b/src/competence-matcher/src/utils/huggingface.ts @@ -3,8 +3,8 @@ import ZeroShotSemanticOpposites from '../tasks/semantic-zeroshot'; export async function ensureAllHuggingfaceModelsAreAvailable() { try { - Embedding.getInstance(); - ZeroShotSemanticOpposites.getInstance(); + await Embedding.getInstance(); + await ZeroShotSemanticOpposites.getInstance(); } catch (error) { throw error; } diff --git a/src/competence-matcher/src/utils/ollama.ts b/src/competence-matcher/src/utils/ollama.ts index cb53af134..d47432021 100644 --- a/src/competence-matcher/src/utils/ollama.ts +++ b/src/competence-matcher/src/utils/ollama.ts @@ -1,11 +1,12 @@ import { Ollama } from 'ollama'; import { config } from '../config'; -const { ollamaPath, splittingModel, reasonModel } = config; +const { ollamaPath, splittingModel, reasonModel, ollamaBearerToken } = config; export const ollama = new Ollama({ host: ollamaPath, headers: { + Authorization: `Bearer ${ollamaBearerToken}`, // https://github.com/ollama/ollama-js?tab=readme-ov-file#user-content-custom-headers 'User-Agent': 'PROCEED Competence Matcher', }, }); From 535002012408b1fc45625d46da02cdd183bfa907 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 31 Jul 2025 22:59:00 +0200 Subject: [PATCH 12/48] Add uuid package as a dependency in package.json --- src/competence-matcher/package.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index af9e191db..3487a1c03 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -27,7 +27,8 @@ "dotenv": "^17.2.1", "express": "^5.1.0", "ollama": "^0.5.16", - "sqlite-vec": "^0.1.7-alpha.2" + "sqlite-vec": "^0.1.7-alpha.2", + "uuid": "^11.1.0" }, "devDependencies": { "@types/express": "^5.0.2", @@ -38,4 +39,4 @@ "engines": { "node": ">=23.5.0" } -} \ No newline at end of file +} From bb66378b0528602bd7ad65ea11c052db71015c44 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 19 Aug 2025 13:09:48 +0200 Subject: [PATCH 13/48] Add .prettierignore file and and ran prettier --- .prettierignore | 4 + src/competence-matcher/openAPI.json | 1264 +++++++++++++-------------- 2 files changed, 594 insertions(+), 674 deletions(-) create mode 100644 .prettierignore diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 000000000..fdf2b9577 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,4 @@ +src/competence-matcher/dist/ +src/competence-matcher/src/db/dbs/ +src/competence-matcher/src/models/ +src/competence-matcher/.env \ No newline at end of file diff --git a/src/competence-matcher/openAPI.json b/src/competence-matcher/openAPI.json index dc04d3fad..10a3af685 100644 --- a/src/competence-matcher/openAPI.json +++ b/src/competence-matcher/openAPI.json @@ -1,720 +1,636 @@ { - "openapi": "3.0.3", - "info": { - "title": "Matching Server API", - "version": "0.1.0", - "description": "API for managing resource competence lists and matching tasks to resources." + "openapi": "3.0.3", + "info": { + "title": "Matching Server API", + "version": "0.1.0", + "description": "API for managing resource competence lists and matching tasks to resources." + }, + "servers": [ + { + "url": "http://localhost:8501" + } + ], + "paths": { + "/": { + "get": { + "tags": ["Default"], + "summary": "Welcome endpoint", + "responses": { + "200": { + "description": "Hello World!", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + } + } + } + } }, - "servers": [ - { - "url": "http://localhost:8501" + "/resource-competence-list": { + "get": { + "tags": ["Resources"], + "summary": "Get all resource lists (only if multipleDBs=true)", + "responses": { + "200": { + "description": "Array of resource list IDs", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } } - ], - "paths": { - "/": { - "get": { - "tags": [ - "Default" - ], - "summary": "Welcome endpoint", - "responses": { - "200": { - "description": "Hello World!", - "content": { - "text/plain": { - "schema": { - "type": "string" - } - } - } - } + } + }, + "/resource-competence-list/jobs": { + "post": { + "tags": ["Resources"], + "summary": "Create a resource competence list job", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ResourceInput" } + } } + } }, - "/resource-competence-list": { - "get": { - "tags": [ - "Resources" - ], - "summary": "Get all resource lists (only if multipleDBs=true)", - "responses": { - "200": { - "description": "Array of resource list IDs", - "content": { - "application/json": { - "schema": { - "type": "array", - "items": { - "type": "string" - } - } - } - } - } + "responses": { + "202": { + "description": "Job accepted", + "headers": { + "Location": { + "schema": { + "type": "string" + } + } + }, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" } + } } - }, - "/resource-competence-list/jobs": { - "post": { - "tags": [ - "Resources" - ], - "summary": "Create a resource competence list job", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ResourceInput" - } - } - } - } - }, - "responses": { - "202": { - "description": "Job accepted", - "headers": { - "Location": { - "schema": { - "type": "string" - } - } - }, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobResponse" - } - } - } - } + } + } + } + }, + "/resource-competence-list/jobs/{jobId}": { + "get": { + "tags": ["Resources"], + "summary": "Get status of a resource creation job", + "parameters": [ + { + "name": "jobId", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "202": { + "description": "Job pending/preprocessing/running", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" } + } } - }, - "/resource-competence-list/jobs/{jobId}": { - "get": { - "tags": [ - "Resources" - ], - "summary": "Get status of a resource creation job", - "parameters": [ + }, + "201": { + "description": "Completed with competenceListId", + "headers": { + "Location": { + "schema": { + "type": "string" + }, + "description": "Complete Path to created Competence-List" + } + }, + "content": { + "application/json": { + "schema": { + "allOf": [ { - "name": "jobId", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "responses": { - "202": { - "description": "Job pending/preprocessing/running", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobResponse" - } - } - } - }, - "201": { - "description": "Completed with competenceListId", - "headers": { - "Location": { - "schema": { - "type": "string" - }, - "description": "Complete Path to created Competence-List" - } - }, - "content": { - "application/json": { - "schema": { - "allOf": [ - { - "$ref": "#/components/schemas/JobResponse" - }, - { - "type": "object", - "properties": { - "competenceListId": { - "type": "string" - } - }, - "required": [ - "competenceListId" - ] - } - ] - } - } - } + "$ref": "#/components/schemas/JobResponse" }, - "500": { - "description": "Job failed", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobResponse" - } - } - } - } - } - } - }, - "/resource-competence-list/{competenceListId}": { - "get": { - "tags": [ - "Resources" - ], - "summary": "Get a specific resource competence list", - "parameters": [ { - "name": "competenceListId", - "in": "path", - "required": true, - "schema": { - "type": "string" + "type": "object", + "properties": { + "competenceListId": { + "type": "string" } + }, + "required": ["competenceListId"] } - ], - "responses": { - "200": { - "description": "Resource list details", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ResourceList" - } - } - } - }, - "400": { - "description": "Missing resourceListId" - }, - "404": { - "description": "Not found" - } + ] } + } } - }, - "/matching-task-to-resource/jobs": { - "post": { - "tags": [ - "Matching" - ], - "summary": "Create a matching job", - "description": "Start a matching job by providing either an existing competenceListId or an inline competenceList.", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/MatchByListIdRequest" - }, - { - "$ref": "#/components/schemas/MatchByListRequest" - } - ] - }, - "examples": { - "Match existing list": { - "summary": "Use an existing list ID", - "value": { - "competenceListId": "123e4567-e89b-12d3-a456-426614174000", - "tasks": [ - { - "taskId": "task1", - "name": "Task 1", - "description": "This is what we currently use for matching", - "executionInstructions": "Some execution instructions", - "requiredCompetencies": [ - { - "competenceId": "comp1", - "name": "Competence 1", - "description": "Description of competence 1", - "externalQualificationNeeded": false, - "renewTime": 30, - "proficiencyLevel": "advanced", - "qualificationDates": [ - "2025-07-01" - ], - "lastUsages": [ - "2025-07-10T14:30:00Z" - ] - } - ] - } - ] - } - }, - "Match inline list": { - "summary": "Pass a full competenceList inline", - "value": { - "competenceList": [ - { - "resourceId": "string", - "competencies": [ - { - "competenceId": "string", - "name": "string", - "description": "string", - "externalQualificationNeeded": true, - "renewTime": 0, - "proficiencyLevel": "string", - "qualificationDates": [ - "2025-07-15" - ], - "lastUsages": [ - "2025-07-15T10:37:09.695Z" - ] - } - ] - } - ], - "tasks": [ - { - "taskId": "task1", - "name": "Task 1", - "description": "This is what we currently use for matching", - "executionInstructions": "Some execution instructions", - "requiredCompetencies": [ - { - "competenceId": "comp1", - "name": "Competence 1", - "description": "Description of competence 1", - "externalQualificationNeeded": false, - "renewTime": 30, - "proficiencyLevel": "advanced", - "qualificationDates": [ - "2025-07-01" - ], - "lastUsages": [ - "2025-07-10T14:30:00Z" - ] - } - ] - } - ] - } - } - } - } - } - }, - "responses": { - "202": { - "description": "Match job accepted", - "headers": { - "Location": { - "schema": { - "type": "string" - }, - "description": "Complete Path to created Competence-List" - } - }, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobResponse" - } - } - } - } + }, + "500": { + "description": "Job failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" } + } } - }, - "/matching-task-to-resource/jobs/{jobId}": { - "get": { - "tags": [ - "Matching" - ], - "summary": "Get match job results", - "parameters": [ - { - "name": "jobId", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "rankBy", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "avgFit", - "bestFit" - ] - }, - "description": "Optional ranking method: 'avgFit' or 'bestFit'" - } - ], - "responses": { - "202": { - "description": "Job pending/running", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobResponse" - } - } - } - }, - "200": { - "description": "Match results", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GroupedMatchResults" - } - } - } - }, - "404": { - "description": "Job not found" - }, - "500": { - "description": "Job failed" - } + } + } + } + }, + "/resource-competence-list/{competenceListId}": { + "get": { + "tags": ["Resources"], + "summary": "Get a specific resource competence list", + "parameters": [ + { + "name": "competenceListId", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Resource list details", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ResourceList" } + } } + }, + "400": { + "description": "Missing resourceListId" + }, + "404": { + "description": "Not found" + } } + } }, - "components": { - "schemas": { - "ResourceInput": { - "type": "object", - "required": [ - "resourceId", - "competencies" - ], - "properties": { - "resourceId": { - "type": "string" - }, - "competencies": { - "type": "array", - "items": { - "$ref": "#/components/schemas/CompetenceInput" - } - } + "/matching-task-to-resource/jobs": { + "post": { + "tags": ["Matching"], + "summary": "Create a matching job", + "description": "Start a matching job by providing either an existing competenceListId or an inline competenceList.", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/MatchByListIdRequest" + }, + { + "$ref": "#/components/schemas/MatchByListRequest" + } + ] + }, + "examples": { + "Match existing list": { + "summary": "Use an existing list ID", + "value": { + "competenceListId": "123e4567-e89b-12d3-a456-426614174000", + "tasks": [ + { + "taskId": "task1", + "name": "Task 1", + "description": "This is what we currently use for matching", + "executionInstructions": "Some execution instructions", + "requiredCompetencies": [ + { + "competenceId": "comp1", + "name": "Competence 1", + "description": "Description of competence 1", + "externalQualificationNeeded": false, + "renewTime": 30, + "proficiencyLevel": "advanced", + "qualificationDates": ["2025-07-01"], + "lastUsages": ["2025-07-10T14:30:00Z"] + } + ] + } + ] + } + }, + "Match inline list": { + "summary": "Pass a full competenceList inline", + "value": { + "competenceList": [ + { + "resourceId": "string", + "competencies": [ + { + "competenceId": "string", + "name": "string", + "description": "string", + "externalQualificationNeeded": true, + "renewTime": 0, + "proficiencyLevel": "string", + "qualificationDates": ["2025-07-15"], + "lastUsages": ["2025-07-15T10:37:09.695Z"] + } + ] + } + ], + "tasks": [ + { + "taskId": "task1", + "name": "Task 1", + "description": "This is what we currently use for matching", + "executionInstructions": "Some execution instructions", + "requiredCompetencies": [ + { + "competenceId": "comp1", + "name": "Competence 1", + "description": "Description of competence 1", + "externalQualificationNeeded": false, + "renewTime": 30, + "proficiencyLevel": "advanced", + "qualificationDates": ["2025-07-01"], + "lastUsages": ["2025-07-10T14:30:00Z"] + } + ] + } + ] + } } + } + } + } + }, + "responses": { + "202": { + "description": "Match job accepted", + "headers": { + "Location": { + "schema": { + "type": "string" + }, + "description": "Complete Path to created Competence-List" + } }, - "CompetenceInput": { - "type": "object", - "required": [ - "competenceId" - ], - "properties": { - "competenceId": { - "type": "string" - }, - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "externalQualificationNeeded": { - "type": "boolean" - }, - "renewTime": { - "type": "number" - }, - "proficiencyLevel": { - "type": "string" - }, - "qualificationDates": { - "type": "array", - "items": { - "type": "string", - "format": "date" - } - }, - "lastUsages": { - "type": "array", - "items": { - "type": "string", - "format": "date-time" - } - } + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" } + } + } + } + } + } + }, + "/matching-task-to-resource/jobs/{jobId}": { + "get": { + "tags": ["Matching"], + "summary": "Get match job results", + "parameters": [ + { + "name": "jobId", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "rankBy", + "in": "query", + "required": false, + "schema": { + "type": "string", + "enum": ["avgFit", "bestFit"] }, - "ResourceList": { - "type": "object", - "required": [ - "competenceListId", - "resources" - ], - "properties": { - "competenceListId": { - "type": "string" - }, - "resources": { - "type": "array", - "items": { - "type": "object", - "required": [ - "resourceId", - "competencies" - ], - "properties": { - "resourceId": { - "type": "string" - }, - "competencies": { - "type": "array", - "items": { - "$ref": "#/components/schemas/CompetenceInput" - } - } - } - } - } + "description": "Optional ranking method: 'avgFit' or 'bestFit'" + } + ], + "responses": { + "202": { + "description": "Job pending/running", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobResponse" } - }, - "JobResponse": { - "type": "object", - "required": [ - "jobId", - "status" - ], - "properties": { - "jobId": { - "type": "string" - }, - "status": { - "type": "string" - } + } + } + }, + "200": { + "description": "Match results", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GroupedMatchResults" } - }, - "MatchByListIdRequest": { - "type": "object", - "required": [ - "competenceListId", - "tasks" - ], - "properties": { - "competenceListId": { - "type": "string" - }, - "tasks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MatchingTask" - } - } + } + } + }, + "404": { + "description": "Job not found" + }, + "500": { + "description": "Job failed" + } + } + } + } + }, + "components": { + "schemas": { + "ResourceInput": { + "type": "object", + "required": ["resourceId", "competencies"], + "properties": { + "resourceId": { + "type": "string" + }, + "competencies": { + "type": "array", + "items": { + "$ref": "#/components/schemas/CompetenceInput" + } + } + } + }, + "CompetenceInput": { + "type": "object", + "required": ["competenceId"], + "properties": { + "competenceId": { + "type": "string" + }, + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "externalQualificationNeeded": { + "type": "boolean" + }, + "renewTime": { + "type": "number" + }, + "proficiencyLevel": { + "type": "string" + }, + "qualificationDates": { + "type": "array", + "items": { + "type": "string", + "format": "date" + } + }, + "lastUsages": { + "type": "array", + "items": { + "type": "string", + "format": "date-time" + } + } + } + }, + "ResourceList": { + "type": "object", + "required": ["competenceListId", "resources"], + "properties": { + "competenceListId": { + "type": "string" + }, + "resources": { + "type": "array", + "items": { + "type": "object", + "required": ["resourceId", "competencies"], + "properties": { + "resourceId": { + "type": "string" + }, + "competencies": { + "type": "array", + "items": { + "$ref": "#/components/schemas/CompetenceInput" + } } - }, - "MatchByListRequest": { - "type": "object", - "required": [ - "competenceList", - "tasks" - ], - "properties": { - "competenceList": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ResourceInput" - } - }, - "tasks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MatchingTask" - } - } + } + } + } + } + }, + "JobResponse": { + "type": "object", + "required": ["jobId", "status"], + "properties": { + "jobId": { + "type": "string" + }, + "status": { + "type": "string" + } + } + }, + "MatchByListIdRequest": { + "type": "object", + "required": ["competenceListId", "tasks"], + "properties": { + "competenceListId": { + "type": "string" + }, + "tasks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MatchingTask" + } + } + } + }, + "MatchByListRequest": { + "type": "object", + "required": ["competenceList", "tasks"], + "properties": { + "competenceList": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ResourceInput" + } + }, + "tasks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MatchingTask" + } + } + } + }, + "MatchingTask": { + "type": "object", + "required": ["taskId"], + "properties": { + "taskId": { + "type": "string" + }, + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "executionInstructions": { + "type": "string" + }, + "requiredCompetencies": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/CompetenceInput" } + ] + } + } + } + }, + "GroupedMatchResults": { + "type": "object", + "required": ["taskOverview", "resourceRanking"], + "properties": { + "tasks": { + "$ref": "#/components/schemas/TaskOverview" + }, + "resourceRanking": { + "$ref": "#/components/schemas/ResourceRanking" + } + } + }, + "TaskOverview": { + "type": "array", + "items": { + "type": "object", + "required": ["taskId", "taskText"], + "properties": { + "taskId": { + "type": "string" }, - "MatchingTask": { - "type": "object", - "required": [ - "taskId" - ], - "properties": { - "taskId": { - "type": "string" - }, - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "executionInstructions": { - "type": "string" - }, - "requiredCompetencies": { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/CompetenceInput" - } - ] - } - } - } + "taskText": { + "type": "string" + } + } + } + }, + "ResourceRanking": { + "type": "array", + "items": { + "type": "object", + "required": ["resourceId", "taskMatchings", "avgTaskMatchProbability"], + "properties": { + "resourceId": { + "type": "string" }, - "GroupedMatchResults": { + "taskMatchings": { + "type": "array", + "items": { "type": "object", - "required": [ - "taskOverview", - "resourceRanking" - ], + "required": ["taskId", "taskText", "competenceMatchings", "maxMatchProbability"], "properties": { - "tasks": { - "$ref": "#/components/schemas/TaskOverview" - }, - "resourceRanking": { - "$ref": "#/components/schemas/ResourceRanking" - } - } - }, - "TaskOverview": { - "type": "array", - "items": { - "type": "object", - "required": [ - "taskId", - "taskText" - ], - "properties": { - "taskId": { - "type": "string" + "taskId": { + "type": "string" + }, + "competenceMatchings": { + "type": "array", + "items": { + "type": "object", + "required": ["competenceId", "matchings", "avgMatchProbability"], + "properties": { + "competenceId": { + "type": "string" }, - "taskText": { - "type": "string" - } - } - } - }, - "ResourceRanking": { - "type": "array", - "items": { - "type": "object", - "required": [ - "resourceId", - "taskMatchings", - "avgTaskMatchProbability" - ], - "properties": { - "resourceId": { - "type": "string" - }, - "taskMatchings": { - "type": "array", - "items": { - "type": "object", - "required": [ - "taskId", - "taskText", - "competenceMatchings", - "maxMatchProbability" - ], - "properties": { - "taskId": { - "type": "string" - }, - "competenceMatchings": { - "type": "array", - "items": { - "type": "object", - "required": [ - "competenceId", - "matchings", - "avgMatchProbability" - ], - "properties": { - "competenceId": { - "type": "string" - }, - "matchings": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MatchDetail" - } - }, - "avgMatchProbability": { - "type": "number" - }, - "avgBestFitTaskMatchProbability": { - "type": "number" - } - } - } - }, - "maxMatchProbability": { - "type": "number" - }, - "maxBestFitMatchProbability": { - "type": "number" - } - } - } + "matchings": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MatchDetail" + } }, - "avgTaskMatchProbability": { - "type": "number" + "avgMatchProbability": { + "type": "number" }, "avgBestFitTaskMatchProbability": { - "type": "number" - }, - "contradicting": { - "type": "boolean" + "type": "number" } + } } + }, + "maxMatchProbability": { + "type": "number" + }, + "maxBestFitMatchProbability": { + "type": "number" + } } + } }, - "MatchDetail": { - "type": "object", - "required": [ - "text", - "type", - "matchProbability" - ], - "properties": { - "text": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "name", - "description", - "proficiencyLevel" - ] - }, - "matchProbability": { - "type": "number" - }, - "alignment": { - "type": "string", - "enum": [ - "aligning", - "neutral", - "contradicting" - ] - }, - "reason": { - "type": "string" - } - } + "avgTaskMatchProbability": { + "type": "number" + }, + "avgBestFitTaskMatchProbability": { + "type": "number" + }, + "contradicting": { + "type": "boolean" } + } + } + }, + "MatchDetail": { + "type": "object", + "required": ["text", "type", "matchProbability"], + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["name", "description", "proficiencyLevel"] + }, + "matchProbability": { + "type": "number" + }, + "alignment": { + "type": "string", + "enum": ["aligning", "neutral", "contradicting"] + }, + "reason": { + "type": "string" + } } + } } -} \ No newline at end of file + } +} From 9a5ebb4e13e339fd2fdb164ec7df57df92830508 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 19 Aug 2025 22:38:12 +0200 Subject: [PATCH 14/48] feat: Enhance error handling and logging across the competence matcher service - Introduced custom error classes for better error context and handling. - Updated middleware to handle database errors and validation errors gracefully. - Improved logging for worker management, model initialization, and semantic splitting tasks. - Added verbose logging options to provide detailed runtime information. - Refactored resource retrieval functions to throw specific errors for better debugging. - Enhanced the reasoning and semantic splitting tasks with detailed error logging. - Implemented error handling in worker management to capture and log worker failures. - Updated the server initialisation process to handle model availability checks with error handling. --- .gitignore | 1 + src/competence-matcher/src/config.ts | 5 + .../src/middleware/error-handler.ts | 52 ++ .../src/middleware/logging.ts | 182 ++++- .../src/middleware/match.ts | 631 +++++++++++------- .../src/middleware/resource.ts | 81 ++- src/competence-matcher/src/server.ts | 115 ++-- src/competence-matcher/src/tasks/reason.ts | 43 +- .../src/tasks/semantic-split.ts | 154 ++--- src/competence-matcher/src/utils/errors.ts | 161 +++++ .../src/utils/huggingface.ts | 30 +- src/competence-matcher/src/utils/model.ts | 4 +- src/competence-matcher/src/utils/ollama.ts | 75 ++- src/competence-matcher/src/utils/types.ts | 19 + .../src/worker/worker-manager.ts | 336 ++++++++-- 15 files changed, 1391 insertions(+), 498 deletions(-) create mode 100644 src/competence-matcher/src/middleware/error-handler.ts create mode 100644 src/competence-matcher/src/utils/errors.ts diff --git a/.gitignore b/.gitignore index f59c76fe1..731f93588 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ src/competence-matcher/dist src/competence-matcher/src/db/dbs src/competence-matcher/src/models src/competence-matcher/.env +src/competence-matcher/logs diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index e33adec02..e9f4d0fea 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -2,6 +2,7 @@ import dotenv from 'dotenv'; dotenv.config({ path: '.env' }); import * as os from 'node:os'; +import path from 'node:path'; export const config = { dbPath: process.env.DB_PATH || 'src/db/dbs/', @@ -16,8 +17,12 @@ export const config = { ollamaBearerToken: process.env.OLLAMA_BEARER_TOKEN || '', ollamaBatchSize: parseInt(process.env.OLLAMA_BATCH_SIZE || '5', 10), splittingModel: process.env.SPLITTING_MODEL || 'llama3.2', + splittingLength: parseInt(process.env.SPLITTING_LENGTH || '1000', 10), // Set this to 0 to disable splitting reasonModel: process.env.REASON_MODEL || 'llama3.2', splittingSymbol: process.env.SPLITTING_SYMBOL || 'SPLITTING_SYMBOL', maxWorkerThreads: parseInt(process.env.NUMBER_OF_THREADS || String(os.cpus().length - 1), 10), // -1 for main thread maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds + verbose: process.env.VERBOSE === 'true' || false, + logDir: process.env.LOG_DIR || path.join(process.cwd(), 'logs'), + logFile: process.env.LOG_FILE || path.join(process.cwd(), 'logs', 'competence-matcher.log'), }; diff --git a/src/competence-matcher/src/middleware/error-handler.ts b/src/competence-matcher/src/middleware/error-handler.ts new file mode 100644 index 000000000..026c5fe09 --- /dev/null +++ b/src/competence-matcher/src/middleware/error-handler.ts @@ -0,0 +1,52 @@ +import { Request, Response, NextFunction } from 'express'; +import { CompetenceMatcherError } from '../utils/errors'; +import { logError } from './logging'; + +/** + * Central error handler middleware + * Catches all errors and provides consistent error responses + */ +export function errorHandler( + error: Error | CompetenceMatcherError, + req: Request, + res: Response, + next: NextFunction, +): void { + const requestId = (req as any).requestId || 'unknown'; + + if (error instanceof CompetenceMatcherError) { + // Handle our custom errors + logError(error, error.context, requestId, { + statusCode: error.statusCode, + details: error.details, + path: req.path, + method: req.method, + }); + + res.status(error.statusCode).json({ + error: { + message: error.message, + context: error.context, + requestId: error.requestId || requestId, + ...(error.details && { details: error.details }), + }, + }); + } else { + // Handle unexpected errors + logError(error, 'unhandled_error', requestId, { + path: req.path, + method: req.method, + body: req.body, + query: req.query, + params: req.params, + }); + + res.status(500).json({ + error: { + message: 'An unexpected error occurred', + context: 'internal_server_error', + requestId: requestId, + }, + }); + } +} diff --git a/src/competence-matcher/src/middleware/logging.ts b/src/competence-matcher/src/middleware/logging.ts index af93af303..2f7d2ee0e 100644 --- a/src/competence-matcher/src/middleware/logging.ts +++ b/src/competence-matcher/src/middleware/logging.ts @@ -1,17 +1,181 @@ import { Request, Response, NextFunction } from 'express'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { config } from '../config'; +import { randomUUID } from 'node:crypto'; +import { LogEntry } from '../utils/types'; + +const { verbose, logDir, logFile } = config; + +const today = new Date().toISOString().split('T')[0]; // Format: YYYY-MM-DD +const logFilePath = `${logFile.replace('.log', '')}-${today}.log`; + +// Create logs directory if it doesn't exist +if (!fs.existsSync(logDir)) { + fs.mkdirSync(logDir, { recursive: true }); +} + +// Generate unique request ID +function generateRequestId(): string { + return `req_${Date.now()}_${randomUUID()}`; +} + +// Write log entry to file +function writeLogToFile(logEntry: LogEntry): void { + const logLine = JSON.stringify(logEntry) + '\n'; + fs.appendFileSync(logFilePath, logLine, 'utf8'); +} + +// Log error with context +export function logError( + error: Error | string, + context: string, + requestId?: string, + additionalData?: any, +): void { + const errorMessage = error instanceof Error ? error.message : String(error); + const errorStack = error instanceof Error ? error.stack : undefined; + + const logEntry: LogEntry = { + timestamp: new Date().toISOString(), + requestId: requestId || 'system', + type: 'error', + error: errorMessage, + errorStack, + context, + ...additionalData, + }; + + // Console logging if verbose + if (verbose) { + console.error(`[${context}] Error:`, errorMessage); + if (errorStack && verbose) { + console.error('Stack trace:', errorStack); + } + if (additionalData) { + console.error('Additional data:', additionalData); + } + } + + // Write to log file + writeLogToFile(logEntry); +} + +// Enhanced request logger middleware export function requestLogger(req: Request, res: Response, next: NextFunction): void { - const { method, query, body, headers, params } = req; - const logData = { - time: new Date().toISOString(), - method, + const requestId = generateRequestId(); + const startTime = Date.now(); + + // Add requestId to request for later use + (req as any).requestId = requestId; + + const requestLogEntry: LogEntry = { + timestamp: new Date().toISOString(), + requestId, + type: 'request', + method: req.method, path: req.path, - query: JSON.stringify(query, null, 2), - body, - headers: JSON.stringify(headers, null, 2), - params: JSON.stringify(params, null, 2), + query: req.query, + body: req.body, + headers: req.headers, + params: req.params, ip: req.ip, + realIp: req.headers['x-real-ip'] || req.headers['x-forwarded-for'] || req.ip, }; - console.table([logData]); + + // Console logging if verbose + if (verbose) { + console.table([ + { + timestamp: requestLogEntry.timestamp, + requestId: requestLogEntry.requestId, + type: requestLogEntry.type, + method: requestLogEntry.method, + path: requestLogEntry.path, + query: + typeof requestLogEntry.query === 'object' + ? JSON.stringify(requestLogEntry.query, null, 2) + : requestLogEntry.query, + headers: + typeof requestLogEntry.headers === 'object' + ? JSON.stringify(requestLogEntry.headers, null, 2) + : requestLogEntry.headers, + params: + typeof requestLogEntry.params === 'object' + ? JSON.stringify(requestLogEntry.params, null, 2) + : requestLogEntry.params, + ip: requestLogEntry.ip, + realIp: + typeof requestLogEntry.realIp === 'object' + ? JSON.stringify(requestLogEntry.realIp, null, 2) + : requestLogEntry.realIp, + }, + ]); + } + + // Write request to log file + writeLogToFile(requestLogEntry); + + // Override res.json to capture response - this is not needed as we will override res.send which appears to be used by res.json internally + // This is commented out to avoid double logging + // const originalJson = res.json; + // res.json = function (obj) { + // const responseTime = Date.now() - startTime; + + // const responseLogEntry: LogEntry = { + // timestamp: new Date().toISOString(), + // requestId, + // type: 'response', + // statusCode: res.statusCode, + // responseTime, + // body: obj, + // }; + + // // Console logging if verbose + // if (verbose) { + // console.table([responseLogEntry]); + // } + + // // Write response to log file + // writeLogToFile(responseLogEntry); + + // return originalJson.call(this, obj); + // }; + + // Override res.send to capture response + const originalSend = res.send; + res.send = function (body) { + const responseTime = Date.now() - startTime; + + const responseLogEntry: LogEntry = { + timestamp: new Date().toISOString(), + requestId, + type: 'response', + statusCode: res.statusCode, + responseTime, + body: body, + }; + + // Console logging if verbose + if (verbose) { + console.table([ + { + timestamp: responseLogEntry.timestamp, + requestId: responseLogEntry.requestId, + type: responseLogEntry.type, + statusCode: responseLogEntry.statusCode, + responseTime: responseLogEntry.responseTime, + // body: responseLogEntry.body, + }, + ]); + } + + // Write response to log file + writeLogToFile(responseLogEntry); + + return originalSend.call(this, body); + }; + next(); } diff --git a/src/competence-matcher/src/middleware/match.ts b/src/competence-matcher/src/middleware/match.ts index 86c44bb56..cebf3e3ab 100644 --- a/src/competence-matcher/src/middleware/match.ts +++ b/src/competence-matcher/src/middleware/match.ts @@ -12,16 +12,25 @@ import { TaskOverview, } from '../utils/types'; import { handleCreateResourceList } from './resource'; +import { + ValidationError, + ResourceNotFoundError, + DatabaseError, + CompetenceMatcherError, +} from '../utils/errors'; +import { logError } from './logging'; export function matchCompetenceList(req: Request, res: Response, next: NextFunction): void { + const requestId = (req as any).requestId; + try { - let listId: string; - let list: ResourceListInput; - let taskInput: MatchingTask[]; + let listId: string | undefined; + let list: ResourceListInput | undefined; + let taskInput: MatchingTask[] | undefined; const db = getDB(req.dbName!); /**-------------------------------------------- - * Checks + * Input Validation *---------------------------------------------*/ if ('competenceList' in req.body) { // Handle case where competenceList is provided @@ -41,52 +50,84 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct taskInput = tasks; } - if (!listId! && !list!) { - res.status(400).json({ - error: 'Either competenceListId or competenceList must be provided.', - }); - return; + // Validate input presence + if (!listId && !list) { + throw new ValidationError( + 'Either competenceListId or competenceList must be provided', + 'competenceListId|competenceList', + { competenceListId: listId, competenceList: list }, + requestId, + ); } - if (!taskInput! || !Array.isArray(taskInput) || taskInput?.length === 0) { - res.status(400).json({ - error: 'An array of tasks must be provided for matching.', - }); - return; + // Validate tasks input + if (!taskInput || !Array.isArray(taskInput) || taskInput.length === 0) { + throw new ValidationError( + 'Must provide a non-empty array of tasks for matching', + 'tasks', + taskInput, + requestId, + ); } - if (listId! && !(typeof listId === 'string')) { - res.status(400).json({ - error: 'competenceListId must be an UUIDStrings.', - }); - return; - } else if ( - list! && + // Validate competenceListId format + if (listId && typeof listId !== 'string') { + throw new ValidationError( + 'Must be a valid UUID string', + 'competenceListId', + listId, + requestId, + ); + } + + // Validate competenceList structure + if ( + list && (!Array.isArray(list) || !list.every( (entry) => typeof entry === 'object' && !Array.isArray(entry) && entry !== null, )) ) { - res.status(400).json({ - error: 'competenceList must be an array of ResourceInput objects.', - }); - return; + throw new ValidationError( + 'Must be an array of ResourceInput objects', + 'competenceList', + list, + requestId, + ); } /**-------------------------------------------- * Case existing competenceListId was passed *---------------------------------------------*/ - if (listId!) { - // Check if the competence list exists - const competenceLists = db.getAvailableResourceLists(); + if (listId) { + let competenceLists: string[]; + + try { + // Check if the competence list exists + competenceLists = db.getAvailableResourceLists(); + } catch (error) { + throw new DatabaseError( + 'getAvailableResourceLists', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + } + if (!competenceLists.includes(listId)) { - res.status(404).json({ - error: `Competence list with ID ${listId} not found.`, - }); - return; + throw new ResourceNotFoundError('Competence list', listId, requestId); + } + + let jobId: string; + try { + jobId = db.createJob(listId); + } catch (error) { + throw new DatabaseError( + 'createJob', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); } - const jobId = db.createJob(listId); const job: MatchingJob = { jobId, dbName: req.dbName!, @@ -116,7 +157,17 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct }), }; - workerManager.enqueue(job, 'matcher'); + try { + workerManager.enqueue(job, 'matcher'); + } catch (error) { + throw new CompetenceMatcherError( + `Failed to enqueue matching job: ${error instanceof Error ? error.message : String(error)}`, + 'job_enqueue', + 500, + requestId, + { jobId, listId }, + ); + } // Respond with jobId in location header res @@ -130,28 +181,74 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct /**-------------------------------------------- * Case new Competence-List was passed *---------------------------------------------*/ - // Create a new competence list - const matchingJobId = db.createJob(); - if (list!) { - db.updateJobStatus(matchingJobId, 'preprocessing'); + let matchingJobId: string; + try { + // Create a new competence list + matchingJobId = db.createJob(); + } catch (error) { + throw new DatabaseError( + 'createJob', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + } + + if (list) { + try { + db.updateJobStatus(matchingJobId, 'preprocessing'); + } catch (error) { + throw new DatabaseError( + 'updateJobStatus', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + } + handleCreateResourceList(req.dbName!, list, (job, code, jobId) => { try { // Embedding fails -> no matching possible (i.e. fail the matching job) if (code !== 0) { - db.updateJobStatus(matchingJobId, 'failed'); + try { + db.updateJobStatus(matchingJobId, 'failed'); + } catch (error) { + logError( + new DatabaseError( + 'updateJobStatus', + error instanceof Error ? error : new Error(String(error)), + requestId, + ), + 'inline_job_failure_update', + requestId, + ); + } + return; + } + + try { + db.updateJobStatus(matchingJobId, 'pending'); + } catch (error) { + logError( + new DatabaseError( + 'updateJobStatus', + error instanceof Error ? error : new Error(String(error)), + requestId, + ), + 'inline_job_pending_update', + requestId, + ); return; } - db.updateJobStatus(matchingJobId, 'pending'); // Retrieve the competence list ID const { referenceId: listId } = db.getJob(jobId); + // Create the matching job const matchingJob: MatchingJob = { jobId: matchingJobId, dbName: req.dbName!, listId, resourceId: undefined, // For now, we don't support matching against a single resource - tasks: taskInput.map((task) => { + tasks: taskInput!.map((task) => { return { taskId: task.taskId, name: task.name, @@ -174,11 +271,35 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct }; }), }; + // Enqueue the matching job workerManager.enqueue(matchingJob, 'matcher'); } catch (error) { - db.updateJobStatus(matchingJobId, 'failed'); - console.error('Error creating (inline) matching job:', error); + try { + db.updateJobStatus(matchingJobId, 'failed'); + } catch (dbError) { + logError( + new DatabaseError( + 'updateJobStatus', + dbError instanceof Error ? dbError : new Error(String(dbError)), + requestId, + ), + 'inline_job_error_update', + requestId, + ); + } + + logError( + new CompetenceMatcherError( + `Failed to create inline matching job: ${error instanceof Error ? error.message : String(error)}`, + 'inline_job_creation', + 500, + requestId, + { matchingJobId }, + ), + 'inline_job_creation', + requestId, + ); } }); @@ -188,219 +309,273 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct .json({ jobId: matchingJobId, status: 'pending' }); } } catch (error) { - console.error('Error matching:', error); - res.status(500).json({ error: 'Internal Server Error' }); + // Pass error to error handler middleware + next(error); } } export function getMatchJobResults(req: Request, res: Response, next: NextFunction): void { - // Get jobId from path - const { jobId } = req.params; - // Get sorter from query params - const requestedSorter = req.query.rankBy as string | undefined; - const sorter = requestedSorter == 'bestFit' ? 'bestFit' : 'avgFit'; // Default to avgFit - const db = getDB(req.dbName!); - - // Check if job exists - const job = db.getJob(jobId); - if (!job) { - res.status(404).json({ error: `Job with ID ${jobId} not found.` }); - return; - } + const requestId = (req as any).requestId; - // Job can be pending, preprocessing, running, completed, or failed - switch (job.status) { - case 'pending': - case 'running': - case 'preprocessing': - res.status(202).json({ + try { + // Get jobId from path + const { jobId } = req.params; + // Get sorter from query params + const requestedSorter = req.query.rankBy as string | undefined; + const sorter = requestedSorter == 'bestFit' ? 'bestFit' : 'avgFit'; // Default to avgFit + + if (!jobId) { + throw new ValidationError( + 'Job ID is required in the request path', + 'jobId', jobId, - status: job.status, - }); - return; - case 'failed': - res.status(500).json({ - error: `Job with ID ${jobId} failed.`, - }); - return; - case 'completed': - // Proceed to return results below - break; - default: - console.error(`Unexpected job status: ${job.status} for jobId: ${jobId}`); - res.status(500).json({ - error: `Job with ID ${jobId} failed.`, - }); - return; - } + requestId, + ); + } - // Return match results - const results = db.getMatchResults(jobId); + let db; + try { + db = getDB(req.dbName!); + } catch (error) { + throw new DatabaseError( + 'getDB', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + } - const tasks: TaskOverview = results.reduce((acc, result) => { - const { taskId, taskText } = result; - // Check if task already exists in the overview - if (!acc.some((task) => task.taskId === taskId)) { - acc.push({ taskId, taskText }); + // Check if job exists + let job; + try { + job = db.getJob(jobId); + } catch (error) { + throw new DatabaseError( + 'getJob', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); } - return acc; - }, [] as TaskOverview); - - // Get the structure of the results - let groupedResults: ResourceRanking = results.reduce((acc, result) => { - const { taskId, competenceId, resourceId, distance, text, type, alignment, reason } = result; - - // resourceId - let resourceGroup = acc.find((group) => group.resourceId === resourceId); - if (!resourceGroup) { - resourceGroup = { - resourceId, - taskMatchings: [], - avgTaskMatchProbability: 0, - avgBestFitTaskMatchProbability: 0, - contradicting: false, - }; - acc.push(resourceGroup); + + if (!job) { + throw new ResourceNotFoundError('Job', jobId, requestId); } - // taskMatchings - let taskMatches = resourceGroup.taskMatchings.find((task) => task.taskId === taskId); - if (!taskMatches) { - taskMatches = { - taskId, - competenceMatchings: [], - maxMatchProbability: 0, - maxBestFitMatchProbability: 0, - }; - resourceGroup.taskMatchings.push(taskMatches); + + // Job can be pending, preprocessing, running, completed, or failed + switch (job.status) { + case 'pending': + case 'running': + case 'preprocessing': + res.status(202).json({ + jobId, + status: job.status, + }); + return; + case 'failed': + throw new CompetenceMatcherError( + `Job with ID '${jobId}' has failed during processing`, + 'job_execution_failed', + 500, + requestId, + { jobId, jobStatus: job.status }, + ); + case 'completed': + // Proceed to return results below + break; + default: + throw new CompetenceMatcherError( + `Job with ID '${jobId}' has unexpected status: ${job.status}`, + 'unexpected_job_status', + 500, + requestId, + { jobId, jobStatus: job.status }, + ); } - // competenceMatchings - let competenceMatches = taskMatches.competenceMatchings.find( - (competence) => competence.competenceId === competenceId, - ); - if (!competenceMatches) { - competenceMatches = { - competenceId, - matchings: [], - avgMatchProbability: 0, - avgBestFitMatchProbability: 0, - }; - taskMatches.competenceMatchings.push(competenceMatches); + // Return match results + let results; + try { + results = db.getMatchResults(jobId); + } catch (error) { + throw new DatabaseError( + 'getMatchResults', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); } - // Add the match to competenceMatches - competenceMatches.matchings.push({ - text, - type: type as 'name' | 'description' | 'proficiencyLevel', - matchProbability: distance, - alignment: alignment as 'contradicting' | 'neutral' | 'aligning', - reason: reason || undefined, - }); - - return acc; - }, [] as ResourceRanking); - - // Aggregate and sort - groupedResults = groupedResults - .map((resourceGroup) => { - const { resourceId, taskMatchings, avgTaskMatchProbability, avgBestFitTaskMatchProbability } = - resourceGroup; - - const newTaskMatchings = taskMatchings.map((taskGroup) => { - const { taskId, competenceMatchings, maxMatchProbability, maxBestFitMatchProbability } = - taskGroup; - - const newCompetenceMatchings = competenceMatchings.map((competenceGroup) => { - const { competenceId, matchings, avgMatchProbability, avgBestFitMatchProbability } = - competenceGroup; - - // Calculate average match probability for this competence (i.e. avg over all parts of this competence) - const totalMatchProbability = matchings.reduce( - (sum, match) => sum + match.matchProbability, - 0, - ); + const tasks: TaskOverview = results.reduce((acc, result) => { + const { taskId, taskText } = result; + // Check if task already exists in the overview + if (!acc.some((task) => task.taskId === taskId)) { + acc.push({ taskId, taskText }); + } + return acc; + }, [] as TaskOverview); + + // Get the structure of the results + let groupedResults: ResourceRanking = results.reduce((acc, result) => { + const { taskId, competenceId, resourceId, distance, text, type, alignment, reason } = result; + + // resourceId + let resourceGroup = acc.find((group) => group.resourceId === resourceId); + if (!resourceGroup) { + resourceGroup = { + resourceId, + taskMatchings: [], + avgTaskMatchProbability: 0, + avgBestFitTaskMatchProbability: 0, + contradicting: false, + }; + acc.push(resourceGroup); + } + // taskMatchings + let taskMatches = resourceGroup.taskMatchings.find((task) => task.taskId === taskId); + if (!taskMatches) { + taskMatches = { + taskId, + competenceMatchings: [], + maxMatchProbability: 0, + maxBestFitMatchProbability: 0, + }; + resourceGroup.taskMatchings.push(taskMatches); + } - let numberOfBestFits = 0; - const totalBestFitMatchProbability = matchings.reduce((sum, match) => { - if (match.alignment === 'aligning') { - numberOfBestFits++; - return sum + match.matchProbability; - } - return sum; - }, 0); + // competenceMatchings + let competenceMatches = taskMatches.competenceMatchings.find( + (competence) => competence.competenceId === competenceId, + ); + if (!competenceMatches) { + competenceMatches = { + competenceId, + matchings: [], + avgMatchProbability: 0, + avgBestFitMatchProbability: 0, + }; + taskMatches.competenceMatchings.push(competenceMatches); + } + + // Add the match to competenceMatches + competenceMatches.matchings.push({ + text, + type: type as 'name' | 'description' | 'proficiencyLevel', + matchProbability: distance, + alignment: alignment as 'contradicting' | 'neutral' | 'aligning', + reason: reason || undefined, + }); + + return acc; + }, [] as ResourceRanking); + + // Aggregate and sort + groupedResults = groupedResults + .map((resourceGroup) => { + const { + resourceId, + taskMatchings, + avgTaskMatchProbability, + avgBestFitTaskMatchProbability, + } = resourceGroup; + + const newTaskMatchings = taskMatchings.map((taskGroup) => { + const { taskId, competenceMatchings, maxMatchProbability, maxBestFitMatchProbability } = + taskGroup; + + const newCompetenceMatchings = competenceMatchings.map((competenceGroup) => { + const { competenceId, matchings, avgMatchProbability, avgBestFitMatchProbability } = + competenceGroup; + + // Calculate average match probability for this competence (i.e. avg over all parts of this competence) + const totalMatchProbability = matchings.reduce( + (sum, match) => sum + match.matchProbability, + 0, + ); + + let numberOfBestFits = 0; + const totalBestFitMatchProbability = matchings.reduce((sum, match) => { + if (match.alignment === 'aligning') { + numberOfBestFits++; + return sum + match.matchProbability; + } + return sum; + }, 0); + + // Return sorted + return { + competenceId, + matchings: matchings.sort((a, b) => b.matchProbability - a.matchProbability), + avgMatchProbability: totalMatchProbability / matchings.length, + avgBestFitMatchProbability: + numberOfBestFits > 0 ? totalBestFitMatchProbability / numberOfBestFits : 0, // If no best fit, set to 0 + }; + }); // Return sorted return { - competenceId, - matchings: matchings.sort((a, b) => b.matchProbability - a.matchProbability), - avgMatchProbability: totalMatchProbability / matchings.length, - avgBestFitMatchProbability: - numberOfBestFits > 0 ? totalBestFitMatchProbability / numberOfBestFits : 0, // If no best fit, set to 0 + taskId, + competenceMatchings: newCompetenceMatchings.sort((a, b) => { + const key = + sorter === 'bestFit' ? 'avgBestFitMatchProbability' : 'avgMatchProbability'; + return b[key] - a[key]; + }), + maxMatchProbability: Math.max( + ...newCompetenceMatchings.map((c) => c.avgMatchProbability), + ), + maxBestFitMatchProbability: Math.max( + ...newCompetenceMatchings.map((c) => c.avgBestFitMatchProbability), + ), }; }); + // Calculate average task match probability for this resource + const totalTaskMatchProbability = newTaskMatchings.reduce( + (sum, task) => sum + task.maxMatchProbability, + 0, + ); + const totalBestFitTaskMatchProbability = newTaskMatchings.reduce( + (sum, task) => sum + task.maxBestFitMatchProbability, + 0, + ); + // Return sorted return { - taskId, - competenceMatchings: newCompetenceMatchings.sort((a, b) => { - const key = sorter === 'bestFit' ? 'avgBestFitMatchProbability' : 'avgMatchProbability'; + resourceId, + taskMatchings: newTaskMatchings.sort((a, b) => { + const key = sorter === 'bestFit' ? 'maxBestFitMatchProbability' : 'maxMatchProbability'; return b[key] - a[key]; }), - maxMatchProbability: Math.max( - ...newCompetenceMatchings.map((c) => c.avgMatchProbability), - ), - maxBestFitMatchProbability: Math.max( - ...newCompetenceMatchings.map((c) => c.avgBestFitMatchProbability), + avgTaskMatchProbability: totalTaskMatchProbability / newTaskMatchings.length, + avgBestFitTaskMatchProbability: + totalBestFitTaskMatchProbability / newTaskMatchings.length || 0, // If no best fit, set to 0 + contradicting: newTaskMatchings.some((task) => + task.competenceMatchings.some((competence) => + competence.matchings.some((match) => match.alignment === 'contradicting'), + ), ), }; + }) + .sort((a, b) => { + const key = + sorter === 'bestFit' ? 'avgBestFitTaskMatchProbability' : 'avgTaskMatchProbability'; + + // Sort in two levels: Contradicting, key + // First not contradicting resources, then contradicting ones + // Case one is contradicting, the other is not + if (a.contradicting !== b.contradicting) { + return a.contradicting ? 1 : -1; // Non-contradicting first + } + // Both are contradicting or both are not + // Sort by the key + return b[key] - a[key]; }); - // Calculate average task match probability for this resource - const totalTaskMatchProbability = newTaskMatchings.reduce( - (sum, task) => sum + task.maxMatchProbability, - 0, - ); - const totalBestFitTaskMatchProbability = newTaskMatchings.reduce( - (sum, task) => sum + task.maxBestFitMatchProbability, - 0, - ); + const load: GroupedMatchResults = { + tasks, + resourceRanking: groupedResults, + }; - // Return sorted - return { - resourceId, - taskMatchings: newTaskMatchings.sort((a, b) => { - const key = sorter === 'bestFit' ? 'maxBestFitMatchProbability' : 'maxMatchProbability'; - return b[key] - a[key]; - }), - avgTaskMatchProbability: totalTaskMatchProbability / newTaskMatchings.length, - avgBestFitTaskMatchProbability: - totalBestFitTaskMatchProbability / newTaskMatchings.length || 0, // If no best fit, set to 0 - contradicting: newTaskMatchings.some((task) => - task.competenceMatchings.some((competence) => - competence.matchings.some((match) => match.alignment === 'contradicting'), - ), - ), - }; - }) - .sort((a, b) => { - const key = - sorter === 'bestFit' ? 'avgBestFitTaskMatchProbability' : 'avgTaskMatchProbability'; - - // Sort in two levels: Contradicting, key - // First not contradicting resources, then contradicting ones - // Case one is contradicting, the other is not - if (a.contradicting !== b.contradicting) { - return a.contradicting ? 1 : -1; // Non-contradicting first - } - // Both are contradicting or both are not - // Sort by the key - return b[key] - a[key]; - }); - - const load: GroupedMatchResults = { - tasks, - resourceRanking: groupedResults, - }; - - res.status(200).json(load); + res.status(200).json(load); + } catch (error) { + // Pass error to error handler middleware + next(error); + } } diff --git a/src/competence-matcher/src/middleware/resource.ts b/src/competence-matcher/src/middleware/resource.ts index cf5d9848e..59afe7ce4 100644 --- a/src/competence-matcher/src/middleware/resource.ts +++ b/src/competence-matcher/src/middleware/resource.ts @@ -4,59 +4,82 @@ import { getDB } from '../utils/db'; import workerManager from '../worker/worker-manager'; import { splitSemantically } from '../tasks/semantic-split'; import { CompetenceInput, EmbeddingJob, EmbeddingTask, ResourceInput } from '../utils/types'; +import { + ValidationError, + ResourceNotFoundError, + DatabaseError, + CompetenceMatcherError, +} from '../utils/errors'; +import { logError } from './logging'; export function getResourceLists(req: Request, res: Response, next: NextFunction): void { + const requestId = (req as any).requestId; + try { - const db = getDB(req.dbName!); + let db; + try { + db = getDB(req.dbName!); + } catch (error) { + throw new DatabaseError( + 'getDB', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + } - const availableResourceLists = db.getAvailableResourceLists(); + let availableResourceLists; + try { + availableResourceLists = db.getAvailableResourceLists(); + } catch (error) { + throw new DatabaseError( + 'getAvailableResourceLists', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + } - res.status(200).json(availableResourceLists); // string[] + res.status(200).json(availableResourceLists); } catch (error) { - console.error('Error retrieving resource lists:', error); - res.status(500).json({ error: 'Internal Server Error' }); + next(error); } } export function getResourceList(req: Request, res: Response, next: NextFunction): void { + const requestId = (req as any).requestId; + try { - const db = getDB(req.dbName!); const resourceListId = req.params.resourceListId; if (!resourceListId) { - res.status(400).json({ error: 'Resource list ID is required' }); - return; + throw new ValidationError( + 'Resource list ID is required in the request parameters', + 'resourceListId', + resourceListId, + requestId, + ); + } + + let db; + try { + db = getDB(req.dbName!); + } catch (error) { + throw new DatabaseError( + 'getDB', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); } let resourceList; try { resourceList = db.getResourceList(resourceListId); } catch (error) { - res.status(404).json({ error: 'Resource list not found' }); - return; + throw new ResourceNotFoundError('Resource list', resourceListId, requestId); } res.status(200).json(resourceList); - // type: - // resourceList: { - // competenceListId: string; - // resources: Array<{ - // resourceId: string; - // competencies: Array<{ - // competenceId: string; - // name?: string; - // description?: string; - // externalQualificationNeeded: boolean; - // renewTime?: number; - // proficiencyLevel?: string; - // qualificationDates: string[]; - // lastUsages: string[]; - // }>; - // }>; - // } } catch (error) { - console.error('Error retrieving resource list:', error); - res.status(500).json({ error: 'Internal Server Error' }); + next(error); } } diff --git a/src/competence-matcher/src/server.ts b/src/competence-matcher/src/server.ts index 158a5faac..8d8e05c9a 100644 --- a/src/competence-matcher/src/server.ts +++ b/src/competence-matcher/src/server.ts @@ -5,14 +5,17 @@ import MatchRouter from './routes/match'; import { config } from './config'; import { dbHeader } from './middleware/db-locator'; import { requestLogger } from './middleware/logging'; +import { errorHandler } from './middleware/error-handler'; import Embedding from './tasks/embedding'; import { ensureAllOllamaModelsAreAvailable } from './utils/ollama'; import { splitSemantically } from './tasks/semantic-split'; import { createWorker } from './utils/worker'; import { ensureAllHuggingfaceModelsAreAvailable } from './utils/huggingface'; import { EmbeddingTask } from './utils/types'; +import { CompetenceMatcherError } from './utils/errors'; +import { logError } from './middleware/logging'; -const { port: PORT } = config; +const { port: PORT, verbose } = config; export const PATHS = { resource: '/resource-competence-list', @@ -29,58 +32,43 @@ declare module 'express-serve-static-core' { async function main() { const app = express(); - // Ensure all required models are available - // Hugging Face models - await ensureAllHuggingfaceModelsAreAvailable(); - // Ollama models - await ensureAllOllamaModelsAreAvailable(); - - // const tasks = [ - // { - // listId: 'test-list', - // resourceId: 'test-resource', - // competenceId: 'test-competence', - // text: 'This competence covers the principles and best practices of designing scalable software systems. It includes high-level architecture, component interaction, and trade-off analysis. Practitioners will need to balance performance, reliability, and maintainability when making design decisions.', - // type: 'description', - // }, - // { - // listId: 'test-list', - // resourceId: 'test-resource', - // competenceId: 'test-competence', - // text: 'This competence focuses on building and maintaining RESTful and GraphQL APIs. It covers endpoint design, versioning strategies, and error handling. Learners will gain hands-on experience with request validation, authentication, and performance tuning.', - // type: 'description', - // }, - // { - // listId: 'test-list', - // resourceId: 'test-resource', - // competenceId: 'test-competence', - // text: 'This competence entails designing effective database schemas to represent business domains. It involves normalization, denormalization, and indexing strategies for optimal query performance. Real-world scenarios will illustrate when to choose relational versus NoSQL approaches.', - // type: 'description', - // }, - // { - // listId: 'test-list', - // resourceId: 'test-resource', - // competenceId: 'test-competence', - // text: 'This competence covers fundamental security principles for web applications. Topics include authentication, authorization, encryption, and secure configuration management. Practical exercises demonstrate common vulnerabilities and how to mitigate them effectively.', - // type: 'description', - // }, - // { - // listId: 'test-list', - // resourceId: 'test-resource', - // competenceId: 'test-competence', - // text: "This person can not swim at all. Please don't let them close water at all.", - // type: 'description', - // }, - // ] as EmbeddingTask[]; - - // const testworker = createWorker('test'); - // testworker.on('message', (message) => { - // console.log(message); - // }); - // testworker.postMessage(tasks); - - // const result = await splitSemantically(tasks); - // console.log(result); + try { + if (verbose) { + console.log('[Server] Initialising competence matcher service...'); + } + + // Ensure all required models are available + // Hugging Face models + if (verbose) { + console.log('[Server] Checking HuggingFace models availability...'); + } + await ensureAllHuggingfaceModelsAreAvailable(); + + // Ollama models + if (verbose) { + console.log('[Server] Checking Ollama models availability...'); + } + await ensureAllOllamaModelsAreAvailable(); + + if (verbose) { + console.log('[Server] All required models are available'); + } + } catch (error) { + const initError = new CompetenceMatcherError( + `Failed to initialise required models: ${error instanceof Error ? error.message : String(error)}`, + 'server_initialisation', + 503, + undefined, + { + stage: 'model_initialisation', + originalError: error instanceof Error ? error.message : String(error), + }, + ); + + logError(initError, 'server_startup_failure'); + console.error('[Server] Failed to start due to model initialisation error'); + process.exit(1); + } // Parse JSON app.use(express.json()); @@ -89,11 +77,10 @@ async function main() { // Middleware to handle database header app.use(dbHeader); // Logging middleware - // app.use(requestLogger); + app.use(requestLogger); // Hello World app.get('/', (req, res, next) => { - console.log('Received a GET request on /'); res.status(200).send('Welcome to the Matching Server'); }); @@ -101,12 +88,26 @@ async function main() { app.use(PATHS.resource, ResourceRouter); app.use(PATHS.match, MatchRouter); + // Error handler middleware (must be last, only invoked if error occurs) + app.use(errorHandler); + app.listen(PORT, () => { - console.log(`Matching-Server is running on http://localhost:${PORT}`); + if (verbose) { + console.log(`[Server] Matching-Server is running on http://localhost:${PORT}`); + } }); } main().catch((error) => { - console.error('Server shutdown due to error:', error); + const startupError = new CompetenceMatcherError( + `Server startup failed: ${error instanceof Error ? error.message : String(error)}`, + 'server_startup', + 500, + undefined, + { originalError: error instanceof Error ? error.message : String(error) }, + ); + + logError(startupError, 'server_startup_failure'); + console.error('[Server] Server shutdown due to startup error:', error); process.exit(1); }); diff --git a/src/competence-matcher/src/tasks/reason.ts b/src/competence-matcher/src/tasks/reason.ts index bb171e083..ff1cba017 100644 --- a/src/competence-matcher/src/tasks/reason.ts +++ b/src/competence-matcher/src/tasks/reason.ts @@ -3,15 +3,24 @@ import { config } from '../config'; import { MATCH_REASON as intructPrompt } from '../utils/prompts'; import type { Message } from 'ollama'; import { Match } from '../utils/types'; +import { ReasoningError, OllamaConnectionError } from '../utils/errors'; +import { logError } from '../middleware/logging'; -const { reasonModel } = config; +const { reasonModel, verbose } = config; export async function addReason(matches: T[], targetText: string): Promise { if (matches.length === 0) { return matches; // No matches to reason about } + + if (verbose) { + console.log( + `[Reasoning] Adding reasons for ${matches.length} matches using model: ${reasonModel}`, + ); + } + const reasonMatches: T[] = await Promise.all( - matches.map(async (match) => { + matches.map(async (match, index) => { const messages: Message[] = [ ...intructPrompt, { @@ -19,6 +28,7 @@ export async function addReason(matches: T[], targetText: strin content: `Task: ${targetText}\nCompetence: ${match.text}\nSimilarity Score: ${match.distance}`, }, ]; + try { const response = await ollama.chat({ model: reasonModel, @@ -27,17 +37,44 @@ export async function addReason(matches: T[], targetText: strin // Extract the reason from the response const reason = response.message.content.trim(); + + if (verbose) { + console.log(`[Reasoning] Generated reason for match ${index + 1}/${matches.length}`); + } + return { ...match, reason, // Add the reason to the match }; } catch (error) { - console.error('Error during reasoning:', error); + const reasoningError = new ReasoningError( + 1, // Single match reasoning failure + error instanceof Error ? error : new Error(String(error)), + ); + + logError(reasoningError, 'reasoning_single_match_failure', undefined, { + matchIndex: index, + totalMatches: matches.length, + targetTextLength: targetText.length, + matchText: match.text.substring(0, 100) + (match.text.length > 100 ? '...' : ''), + similarity: match.distance, + reasonModel, + }); + // If there's an error, just keep the original match without a reason return match; } }), ); + if (verbose) { + const successfulReasons = reasonMatches.filter( + (match) => 'reason' in match && match.reason, + ).length; + console.log( + `[Reasoning] Completed: ${successfulReasons}/${matches.length} matches received reasons`, + ); + } + return reasonMatches; } diff --git a/src/competence-matcher/src/tasks/semantic-split.ts b/src/competence-matcher/src/tasks/semantic-split.ts index c07583a8b..e05677a26 100644 --- a/src/competence-matcher/src/tasks/semantic-split.ts +++ b/src/competence-matcher/src/tasks/semantic-split.ts @@ -3,95 +3,25 @@ import { config } from '../config'; import { SEMANTIC_SPLITTER as intructPrompt } from '../utils/prompts'; import type { Message } from 'ollama'; import { EmbeddingTask } from '../utils/types'; +import { SemanticSplittingError, OllamaConnectionError } from '../utils/errors'; +import { logError } from '../middleware/logging'; -const { splittingModel, splittingSymbol, ollamaBatchSize } = config; - -const MIN_TEXT_LENGTH = 60; // Minimum length of text to consider for splitting (I noticed that text inputs that are too short often lead to errors in the splitting process - and since they are so small, they can be embedded directly without splitting) - -// async function ollamaChat(messages: Array<{ role: string; content: string }>) { -// const res = await fetch(`${ollamaPath}/api/chat`, { -// method: 'POST', -// headers: { 'Content-Type': 'application/json' }, -// body: JSON.stringify({ -// model: config.ollamaSplittingModel, -// messages, -// stream: false, -// }), -// }); -// if (!res.ok) { -// const t = await res.text(); -// throw new Error(`Ollama REST chat failed: ${res.status} ${t}`); -// } -// const data = (await res.json()) as { message: { content: string } }; -// return data.message.content as string; -// } - -// export async function splitSemantically(tasks: EmbeddingTask[]): Promise { -// const splittedTasks: EmbeddingTask[] = []; - -// // First, for each task, decide whether it needs splitting (filteredMessages) -// // or can be passed through as‐is. -// const toSplit: { task: EmbeddingTask; messages: Message[] }[] = []; -// for (const task of tasks) { -// const messages: Message[] = [...intructPrompt, { role: 'user', content: task.text }]; - -// // Filter out too‐short or empty -// const filtered = messages.filter(({ content }) => { -// const c = content.replace(/\s+/g, ' ').trim(); -// return c.length > MIN_TEXT_LENGTH; -// }); - -// if (filtered.length === 0) { -// // no splitting needed -// splittedTasks.push({ ...task, text: task.text }); -// } else { -// toSplit.push({ task, messages: filtered }); -// } -// } - -// // Now process in batches of size ollamaBatchSize -// for (let i = 0; i < toSplit.length; i += ollamaBatchSize) { -// const batch = toSplit.slice(i, i + ollamaBatchSize); - -// // Kick off all requests in this batch in parallel -// const promises = batch.map(async ({ task, messages }) => { -// try { -// const response = await ollamaChat(messages); -// const parts = response -// .replace(/\s+/g, ' ') -// .trim() -// .split(splittingSymbol) -// .map((p: string) => p.trim()) -// .filter((p: string) => p.length > 0); - -// if (parts.length === 0) { -// // fallback to original text if splitting yields nothing -// splittedTasks.push({ ...task, text: task.text }); -// } else { -// for (const part of parts) { -// splittedTasks.push({ ...task, text: part }); -// } -// } -// } catch (err) { -// console.error('Error during semantic splitting:', err); -// // in case of error, include the original -// splittedTasks.push({ ...task, text: task.text }); -// } -// }); - -// // Wait for this batch to finish before launching the next -// await Promise.all(promises); -// } - -// return splittedTasks; -// } - -// _______________________________________________ +const { + splittingModel, + splittingSymbol, + ollamaBatchSize, + splittingLength: MIN_TEXT_LENGTH, + verbose, +} = config; export async function splitSemantically(tasks: EmbeddingTask[]): Promise { const splittedTasks: EmbeddingTask[] = []; const toSplit: { task: EmbeddingTask; messages: Message[] }[] = []; + if (verbose) { + console.log(`[Semantic Split] Processing ${tasks.length} tasks`); + } + for (const task of tasks) { const messages: Message[] = [ ...intructPrompt, @@ -113,42 +43,94 @@ export async function splitSemantically(tasks: EmbeddingTask[]): Promise MIN_TEXT_LENGTH; }); - if (filteredMessages.length === 0) { + if (filteredMessages.length === 0 || MIN_TEXT_LENGTH === 0) { splittedTasks.push({ ...task, text: task.text }); } else { toSplit.push({ task, messages: filteredMessages }); } } + if (verbose) { + console.log(`[Semantic Split] ${toSplit.length} tasks require splitting`); + } + // Process in batches for (let i = 0; i < toSplit.length; i += ollamaBatchSize) { const batch = toSplit.slice(i, i + ollamaBatchSize); + if (verbose) { + console.log( + `[Semantic Split] Processing batch ${Math.floor(i / ollamaBatchSize) + 1}/${Math.ceil(toSplit.length / ollamaBatchSize)} (${batch.length} tasks)`, + ); + } + const promises = batch.map(async ({ task, messages }) => { try { const response = await ollama.chat({ model: splittingModel, messages, }); + const parts = response.message.content .split(splittingSymbol) .map((part: string) => part.trim()) .filter((part: string) => part !== ''); if (parts.length === 0) { + if (verbose) { + console.warn( + `[Semantic Split] No valid parts found for task ${task.listId}/${task.resourceId}/${task.competenceId}, using original text`, + ); + } splittedTasks.push({ ...task, text: task.text }); } else { + if (verbose) { + console.log( + `[Semantic Split] Split task ${task.listId}/${task.resourceId}/${task.competenceId} into ${parts.length} parts`, + ); + } for (const part of parts) { splittedTasks.push({ ...task, text: part }); } } } catch (error) { - console.error('Error during semantic splitting:', error); + const semanticError = new SemanticSplittingError( + task.text.length, + error instanceof Error ? error : new Error(String(error)), + ); + + logError(semanticError, 'semantic_splitting_task_failure', undefined, { + taskId: `${task.listId}/${task.resourceId}/${task.competenceId}`, + textLength: task.text.length, + splittingModel, + }); + + // Fallback to original text splittedTasks.push({ ...task, text: task.text }); } }); - await Promise.all(promises); + try { + await Promise.all(promises); + } catch (error) { + // This shouldn't happen since we catch errors in individual promises, + // but just in case there's an unexpected Promise.all failure + logError( + new SemanticSplittingError( + batch.length, + error instanceof Error ? error : new Error(String(error)), + ), + 'semantic_splitting_batch_failure', + undefined, + { batchSize: batch.length, batchIndex: Math.floor(i / ollamaBatchSize) }, + ); + } + } + + if (verbose) { + console.log( + `[Semantic Split] Completed: ${tasks.length} input tasks → ${splittedTasks.length} output tasks`, + ); } return splittedTasks; diff --git a/src/competence-matcher/src/utils/errors.ts b/src/competence-matcher/src/utils/errors.ts new file mode 100644 index 000000000..6e81ec5e7 --- /dev/null +++ b/src/competence-matcher/src/utils/errors.ts @@ -0,0 +1,161 @@ +/** + * Custom error classes for the competence matcher service + * These provide descriptive error messages with context + */ + +export class CompetenceMatcherError extends Error { + public readonly context: string; + public readonly statusCode: number; + public readonly requestId?: string; + public readonly details?: any; + + constructor( + message: string, + context: string, + statusCode: number = 500, + requestId?: string, + details?: any, + ) { + super(message); + this.name = this.constructor.name; + this.context = context; + this.statusCode = statusCode; + this.requestId = requestId; + this.details = details; + + // Maintains proper stack trace for where our error was thrown + Error.captureStackTrace(this, this.constructor); + } + + toJSON() { + return { + name: this.name, + message: this.message, + context: this.context, + statusCode: this.statusCode, + requestId: this.requestId, + details: this.details, + stack: this.stack, + }; + } +} + +export class ValidationError extends CompetenceMatcherError { + constructor(message: string, field: string, value: any, requestId?: string) { + super( + `Validation failed for field '${field}': ${message}`, + 'input_validation', + 400, + requestId, + { field, value }, + ); + } +} + +export class DatabaseError extends CompetenceMatcherError { + constructor(operation: string, error: Error, requestId?: string) { + super( + `Database operation '${operation}' failed: ${error.message}`, + 'database_operation', + 500, + requestId, + { operation, originalError: error.message }, + ); + } +} + +export class ModelError extends CompetenceMatcherError { + constructor(modelName: string, operation: string, error: Error, requestId?: string) { + super( + `Model '${modelName}' operation '${operation}' failed: ${error.message}`, + 'model_operation', + 500, + requestId, + { modelName, operation, originalError: error.message }, + ); + } +} + +export class ResourceNotFoundError extends CompetenceMatcherError { + constructor(resourceType: string, resourceId: string, requestId?: string) { + super( + `${resourceType} with ID '${resourceId}' was not found`, + 'resource_not_found', + 404, + requestId, + { resourceType, resourceId }, + ); + } +} + +export class WorkerError extends CompetenceMatcherError { + constructor(workerType: string, jobId: string, error: Error, requestId?: string) { + super( + `Worker '${workerType}' failed for job '${jobId}': ${error.message}`, + 'worker_execution', + 500, + requestId, + { workerType, jobId, originalError: error.message }, + ); + } +} + +export class SemanticSplittingError extends CompetenceMatcherError { + constructor(textLength: number, error: Error, requestId?: string) { + super( + `Semantic splitting failed for text of length ${textLength}: ${error.message}`, + 'semantic_splitting', + 500, + requestId, + { textLength, originalError: error.message }, + ); + } +} + +export class ReasoningError extends CompetenceMatcherError { + constructor(matchCount: number, error: Error, requestId?: string) { + super( + `Reasoning process failed for ${matchCount} matches: ${error.message}`, + 'reasoning', + 500, + requestId, + { matchCount, originalError: error.message }, + ); + } +} + +export class EmbeddingError extends CompetenceMatcherError { + constructor(modelName: string, taskCount: number, error: Error, requestId?: string) { + super( + `Embedding generation with model '${modelName}' failed for ${taskCount} tasks: ${error.message}`, + 'embedding_generation', + 500, + requestId, + { modelName, taskCount, originalError: error.message }, + ); + } +} + +export class OllamaConnectionError extends CompetenceMatcherError { + constructor(host: string, operation: string, error: Error, requestId?: string) { + super( + `Failed to connect to Ollama at '${host}' for operation '${operation}': ${error.message}`, + 'ollama_connection', + 503, + requestId, + { host, operation, originalError: error.message }, + ); + } +} + +export class HuggingFaceModelError extends CompetenceMatcherError { + constructor(modelName: string, operation: string, error: Error, requestId?: string) { + super( + `HuggingFace model '${modelName}' operation '${operation}' failed: ${error.message}`, + 'huggingface_model', + 500, + requestId, + { modelName, operation, originalError: error.message }, + ); + } +} diff --git a/src/competence-matcher/src/utils/huggingface.ts b/src/competence-matcher/src/utils/huggingface.ts index ceff7b42f..401597745 100644 --- a/src/competence-matcher/src/utils/huggingface.ts +++ b/src/competence-matcher/src/utils/huggingface.ts @@ -1,13 +1,37 @@ import Embedding from '../tasks/embedding'; import ZeroShotSemanticOpposites from '../tasks/semantic-zeroshot'; +import { HuggingFaceModelError } from './errors'; +import { config } from '../config'; + +const { verbose } = config; export async function ensureAllHuggingfaceModelsAreAvailable() { + if (verbose) { + console.log('[HuggingFace] Checking availability of required models...'); + } + try { + if (verbose) { + console.log('[HuggingFace] Initialising embedding model...'); + } await Embedding.getInstance(); + + if (verbose) { + console.log('[HuggingFace] Initialising zero-shot semantic opposites model...'); + } await ZeroShotSemanticOpposites.getInstance(); + + if (verbose) { + console.log('[HuggingFace] All models initialised successfully'); + } } catch (error) { - throw error; + throw new HuggingFaceModelError( + 'unknown', // We don't know which specific model failed - will maybe add later + 'initialisation', + error instanceof Error ? error : new Error(String(error)), + ); + } + if (verbose) { + console.log('[HuggingFace] All required HuggingFace-Models are available'); } - - console.log('All required Hugging Face models are available.'); } diff --git a/src/competence-matcher/src/utils/model.ts b/src/competence-matcher/src/utils/model.ts index 885c6fce4..31635ea16 100644 --- a/src/competence-matcher/src/utils/model.ts +++ b/src/competence-matcher/src/utils/model.ts @@ -53,7 +53,9 @@ export abstract class TransformerPipeline { // mark it as loaded and log on first load if (!this.loaded && isMainThread) { - console.log(`${model} (${task}) is ready`); + if (config.verbose) { + console.log(`[Model-Pipeline] ${model} (${task}) is ready`); + } this.loaded = true; } } diff --git a/src/competence-matcher/src/utils/ollama.ts b/src/competence-matcher/src/utils/ollama.ts index d47432021..089bc3f08 100644 --- a/src/competence-matcher/src/utils/ollama.ts +++ b/src/competence-matcher/src/utils/ollama.ts @@ -1,7 +1,8 @@ import { Ollama } from 'ollama'; import { config } from '../config'; +import { OllamaConnectionError } from './errors'; -const { ollamaPath, splittingModel, reasonModel, ollamaBearerToken } = config; +const { ollamaPath, splittingModel, reasonModel, ollamaBearerToken, verbose } = config; export const ollama = new Ollama({ host: ollamaPath, @@ -20,24 +21,72 @@ export const ollama = new Ollama({ export async function ensureAllOllamaModelsAreAvailable() { const models = [splittingModel, reasonModel]; - const availableModels = (await ollama.list()).models.map((model) => model.model); + if (verbose) { + console.log(`[Ollama] Checking availability of models: ${models.join(', ')}`); + } + + let availableModels: string[]; + try { + const modelList = await ollama.list(); + availableModels = modelList.models.map((model) => model.model); + + if (verbose) { + console.log(`[Ollama] Available models: ${availableModels.join(', ')}`); + } + } catch (error) { + throw new OllamaConnectionError( + ollamaPath, + 'list_models', + error instanceof Error ? error : new Error(String(error)), + ); + } for (const model of models) { if (!availableModels.includes(model)) { - const modelpull = await ollama.pull({ - model, - insecure: false, - stream: false, - }); - - // Check if the model was successfully pulled - if (!modelpull || modelpull.status !== 'success') { - throw new Error( - `Model ${model} could not be pulled: ${modelpull?.status || 'Unknown error'}`, + if (verbose) { + console.log(`[Ollama] Model '${model}' not found, attempting to pull...`); + } + + try { + const modelpull = await ollama.pull({ + model, + insecure: false, + stream: false, + }); + + // Check if the model was successfully pulled + if (!modelpull || modelpull.status !== 'success') { + throw new OllamaConnectionError( + ollamaPath, + 'pull_model', + new Error(`Model pull failed: ${modelpull?.status || 'Unknown error'}`), + ); + } + + if (verbose) { + console.log(`[Ollama] Successfully pulled model '${model}'`); + } + } catch (error) { + // If the pull takes too long and the ollama is behind a proxy, it can timeout (504 as response code) + // In this case, we just recheck the model availability + // TODO: + if (error instanceof OllamaConnectionError) { + throw error; + } + throw new OllamaConnectionError( + ollamaPath, + 'pull_model', + error instanceof Error ? error : new Error(String(error)), ); } + } else { + if (verbose) { + console.log(`[Ollama] Model '${model}' is available (already downloaded)`); + } } } - console.log('All required models are available in ollama.'); + if (verbose) { + console.log('[Ollama] All required Ollama-Models models are available'); + } } diff --git a/src/competence-matcher/src/utils/types.ts b/src/competence-matcher/src/utils/types.ts index fb51ca786..98bd306b3 100644 --- a/src/competence-matcher/src/utils/types.ts +++ b/src/competence-matcher/src/utils/types.ts @@ -144,3 +144,22 @@ export interface TransformerPipelineOptions { model: string; options?: PretrainedModelOptions; } + +export interface LogEntry { + timestamp: string; + requestId: string; + type: 'request' | 'response' | 'error'; + method?: string; + path?: string; + query?: object; + body?: any; + headers?: object; + params?: object; + ip?: string; + realIp?: string | string[]; + statusCode?: number; + responseTime?: number; + error?: string; + errorStack?: string; + context?: string; +} diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 0bb7c1a00..b765dcce0 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -5,6 +5,10 @@ import { splitSemantically } from '../tasks/semantic-split'; import { Match, WorkerQueue, workerTypes } from '../utils/types'; import { addReason } from '../tasks/reason'; import { getDB } from '../utils/db'; +import { WorkerError, DatabaseError, ReasoningError } from '../utils/errors'; +import { logError } from '../middleware/logging'; + +const { verbose, maxWorkerThreads } = config; class WorkerManager { private concurrency: number; @@ -20,6 +24,13 @@ class WorkerManager { */ public enqueue(job: any, workerScript: workerTypes, options: WorkerQueue['options'] = {}) { this.queue.push({ job, workerScript, options }); + + if (verbose) { + console.log( + `[WorkerManager] Enqueued ${workerScript} job ${job.jobId || 'unknown'} (queue: ${this.queue.length}, active: ${this.active.size})`, + ); + } + this.dispatch(); } @@ -33,109 +44,296 @@ class WorkerManager { /** Spawn one worker, hook up its lifecycle, and send the job */ private startWorker(job: any, workerScript: workerTypes, options: WorkerQueue['options']) { - const worker = createWorker(workerScript); + const jobId = job.jobId || 'unknown'; + + let worker: Worker; + try { + worker = createWorker(workerScript); + } catch (error) { + const workerError = new WorkerError( + workerScript, + jobId, + error instanceof Error ? error : new Error(String(error)), + ); + + logError(workerError, 'worker_creation_failure', undefined, { + workerScript, + jobId, + queueLength: this.queue.length, + activeWorkers: this.active.size, + }); + + options?.onError?.(job, workerError); + return; + } this.active.add(worker); worker.once('online', () => { - // console.log(`[WorkerManager] Worker for ${workerScript} started`); - worker.postMessage(job); + if (verbose) { + console.log(`[WorkerManager] Worker for ${workerScript} job ${jobId} started`); + } + + try { + worker.postMessage(job); + options?.onOnline?.(job); + } catch (error) { + const messageError = new WorkerError( + workerScript, + jobId, + error instanceof Error ? error : new Error(String(error)), + ); + + logError(messageError, 'worker_message_send_failure', undefined, { + workerScript, + jobId, + }); - options?.onOnline?.(job); + this.active.delete(worker); + worker.terminate(); + this.dispatch(); + options?.onError?.(job, messageError); + } }); // When the worker exits (success or failure), remove from active set & dispatch next worker.once('exit', (code) => { this.active.delete(worker); + if (code === 1) { - console.error(`[WorkerManager] ${workerScript} exited (failed) with code`, code); + const exitError = new WorkerError( + workerScript, + jobId, + new Error(`Worker exited with failure code: ${code}`), + ); + + logError(exitError, 'worker_exit_failure', undefined, { + workerScript, + jobId, + exitCode: code, + }); } else if (code === 0) { - // console.log(`[WorkerManager] ${workerScript} exited successfully`); + if (verbose) { + console.log( + `[WorkerManager] Worker for ${workerScript} job ${jobId} completed successfully`, + ); + } } else if (code === 2) { - console.error(`[WorkerManager] ${workerScript} timed out`); + const timeoutError = new WorkerError( + workerScript, + jobId, + new Error('Worker timed out during execution'), + ); + + logError(timeoutError, 'worker_timeout', undefined, { + workerScript, + jobId, + exitCode: code, + }); + } else { + const unexpectedExitError = new WorkerError( + workerScript, + jobId, + new Error(`Worker exited with unexpected code: ${code}`), + ); + + logError(unexpectedExitError, 'worker_unexpected_exit', undefined, { + workerScript, + jobId, + exitCode: code, + }); } - this.dispatch(); + this.dispatch(); options?.onExit?.(job, code); }); worker.once('error', (err) => { - console.error(`[WorkerManager] ${workerScript} error:`, err); + const workerError = new WorkerError( + workerScript, + jobId, + err instanceof Error ? err : new Error(String(err)), + ); + + logError(workerError, 'worker_runtime_error', undefined, { + workerScript, + jobId, + }); - options?.onError?.(job, err); + options?.onError?.(job, workerError); }); worker.on('message', async (message) => { - switch (message.type) { - case 'status': - console.log(`[WorkerManager] Worker for job ${message.jobId} status:`, message.status); - break; - case 'error': - console.error(`[WorkerManager] Worker for job ${message.jobId} error:`, message.error); - break; - case 'log': - console.log(`[WorkerManager] Worker for job ${message.jobId} log:`, message.message); - break; - - // Workaround for adding reasoning before saving in DB - case 'job': - switch (message.job) { - case 'reason': - await handleReasoning(job, message); - break; - } - break; + try { + switch (message.type) { + case 'status': + if (verbose) { + console.log( + `[WorkerManager] Worker for job ${message.jobId} status: ${message.status}`, + ); + } + break; + case 'error': + logError( + new WorkerError(workerScript, message.jobId || jobId, new Error(message.error)), + 'worker_reported_error', + undefined, + { workerScript, jobId: message.jobId || jobId, reportedError: message.error }, + ); + break; + case 'log': + if (verbose) { + console.log( + `[WorkerManager] Worker for job ${message.jobId} log: ${message.message}`, + ); + } + break; + + // Workaround for adding reasoning before saving in DB + case 'job': + switch (message.job) { + case 'reason': + await handleReasoning(job, message); + break; + } + break; + } + options?.onMessage?.(job, message); + } catch (error) { + const messageHandlingError = new WorkerError( + workerScript, + jobId, + error instanceof Error ? error : new Error(String(error)), + ); + + logError(messageHandlingError, 'worker_message_handling_error', undefined, { + workerScript, + jobId, + messageType: message.type, + }); } - options?.onMessage?.(job, message); }); } } async function handleReasoning(job: any, message: any) { - const finalMatches = []; - // Add reasoning before saving in DB - for (const [task, matches] of Object.entries(message.workload)) { - const taskMatches = await addReason< - Match & { - taskId: string; - taskText: string; - type: 'name' | 'description' | 'proficiencyLevel'; - alignment: 'contradicting' | 'neutral' | 'aligning'; + const jobId = job.jobId || 'unknown'; + + try { + if (verbose) { + console.log(`[WorkerManager] Processing reasoning for job ${jobId}`); + } + + const finalMatches = []; + + // Add reasoning before saving in DB + for (const [task, matches] of Object.entries(message.workload)) { + try { + const taskMatches = await addReason< + Match & { + taskId: string; + taskText: string; + type: 'name' | 'description' | 'proficiencyLevel'; + alignment: 'contradicting' | 'neutral' | 'aligning'; + } + >( + matches as (Match & { + taskId: string; + taskText: string; + type: 'name' | 'description' | 'proficiencyLevel'; + alignment: 'contradicting' | 'neutral' | 'aligning'; + })[], + task, + ); + finalMatches.push(...taskMatches); + } catch (error) { + const reasoningError = new ReasoningError( + (matches as any[]).length, + error instanceof Error ? error : new Error(String(error)), + ); + + logError(reasoningError, 'reasoning_task_failure', undefined, { + jobId, + task: task.substring(0, 100) + (task.length > 100 ? '...' : ''), + matchCount: (matches as any[]).length, + }); + + // Continue with original matches without reasoning + finalMatches.push(...(matches as any[])); } - >( - matches as (Match & { - taskId: string; - taskText: string; - type: 'name' | 'description' | 'proficiencyLevel'; - alignment: 'contradicting' | 'neutral' | 'aligning'; - })[], - task, + } + + // Save in DB + let db; + try { + db = getDB(job.dbName); + } catch (error) { + throw new DatabaseError('getDB', error instanceof Error ? error : new Error(String(error))); + } + + for (const match of finalMatches) { + try { + db.addMatchResult({ + jobId: job.jobId, + taskId: match.taskId, + taskText: match.taskText, + competenceId: match.competenceId, + resourceId: match.resourceId, + distance: match.distance, + text: match.text, + type: match.type, + reason: match.reason, + alignment: match.alignment, + }); + } catch (error) { + throw new DatabaseError( + 'addMatchResult', + error instanceof Error ? error : new Error(String(error)), + ); + } + } + + // Update job status + try { + db.updateJobStatus(job.jobId, 'completed'); + + if (verbose) { + console.log( + `[WorkerManager] Job ${jobId} completed successfully with ${finalMatches.length} matches`, + ); + } + } catch (error) { + throw new DatabaseError( + 'updateJobStatus', + error instanceof Error ? error : new Error(String(error)), + ); + } + } catch (error) { + logError( + error instanceof Error ? error : new Error(String(error)), + 'reasoning_handler_failure', + undefined, + { jobId }, ); - finalMatches.push(...taskMatches); - } - // Save in DB - const db = getDB(job.dbName); - - for (const match of finalMatches) { - db.addMatchResult({ - jobId: job.jobId, - taskId: match.taskId, - taskText: match.taskText, - competenceId: match.competenceId, - resourceId: match.resourceId, - distance: match.distance, - text: match.text, - type: match.type, - reason: match.reason, - alignment: match.alignment, - }); + // Try to mark job as failed + try { + const db = getDB(job.dbName); + db.updateJobStatus(job.jobId, 'failed'); + } catch (dbError) { + logError( + new DatabaseError( + 'updateJobStatus', + dbError instanceof Error ? dbError : new Error(String(dbError)), + ), + 'job_failure_update_error', + undefined, + { jobId }, + ); + } } - - // Update job status - db.updateJobStatus(job.jobId, 'completed'); } // export a singleton instance -const manager = new WorkerManager(config.maxWorkerThreads); +const manager = new WorkerManager(maxWorkerThreads); export default manager; From d193865ad47cf1c81fe947ee0df4b6ed4e34c96b Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 20 Aug 2025 21:46:10 +0200 Subject: [PATCH 15/48] feat: Enhance worker management and configuration for embedding and matching tasks - Updated default batch size for Ollama from 5 to 20. - Introduced new configuration options for embedding and matching workers. - Improved error handling and logging in worker processes. - Refactored worker manager to support static worker pools for embedding and matching tasks. - Added health check mechanism for worker responsiveness. - Implemented job processing logic to handle multiple tasks efficiently. - Enhanced logging for better traceability of worker actions and statuses. --- src/competence-matcher/src/config.ts | 6 +- .../src/middleware/resource.ts | 2 +- src/competence-matcher/src/server.ts | 19 +- src/competence-matcher/src/utils/worker.ts | 4 +- src/competence-matcher/src/worker/embedder.ts | 115 ++- .../src/worker/matcher-new.ts | 0 src/competence-matcher/src/worker/matcher.ts | 309 +++++--- .../src/worker/worker-manager.ts | 678 ++++++++++++++---- 8 files changed, 867 insertions(+), 266 deletions(-) create mode 100644 src/competence-matcher/src/worker/matcher-new.ts diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index e9f4d0fea..b46b08387 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -15,12 +15,14 @@ export const config = { multipleDBs: process.env.MULTIPLE_DBS === 'true' || false, ollamaPath: process.env.OLLAMA_PATH || 'http://localhost:11434', ollamaBearerToken: process.env.OLLAMA_BEARER_TOKEN || '', - ollamaBatchSize: parseInt(process.env.OLLAMA_BATCH_SIZE || '5', 10), + ollamaBatchSize: parseInt(process.env.OLLAMA_BATCH_SIZE || '20', 10), splittingModel: process.env.SPLITTING_MODEL || 'llama3.2', splittingLength: parseInt(process.env.SPLITTING_LENGTH || '1000', 10), // Set this to 0 to disable splitting reasonModel: process.env.REASON_MODEL || 'llama3.2', splittingSymbol: process.env.SPLITTING_SYMBOL || 'SPLITTING_SYMBOL', - maxWorkerThreads: parseInt(process.env.NUMBER_OF_THREADS || String(os.cpus().length - 1), 10), // -1 for main thread + maxWorkerThreads: parseInt(process.env.NUMBER_OF_THREADS || String(os.cpus().length - 1), 10), // -1 for main thread (kept for backward compatibility) + embeddingWorkers: parseInt(process.env.EMBEDDING_WORKERS || '1', 10), // Number of embedding workers to keep alive + matchingWorkers: parseInt(process.env.MATCHING_WORKERS || '1', 10), // Number of matching workers to keep alive maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds verbose: process.env.VERBOSE === 'true' || false, logDir: process.env.LOG_DIR || path.join(process.cwd(), 'logs'), diff --git a/src/competence-matcher/src/middleware/resource.ts b/src/competence-matcher/src/middleware/resource.ts index 59afe7ce4..179e0cf8f 100644 --- a/src/competence-matcher/src/middleware/resource.ts +++ b/src/competence-matcher/src/middleware/resource.ts @@ -175,7 +175,7 @@ export async function handleCreateResourceList( .finally(() => { db.updateJobStatus(jobId!, 'pending'); workerManager.enqueue(job!, 'embedder', { - onExit: (job, code) => onWorkerExit?.(job, code, jobId!), + onExit: (job: any, code: number) => onWorkerExit?.(job, code, jobId!), }); }); diff --git a/src/competence-matcher/src/server.ts b/src/competence-matcher/src/server.ts index 8d8e05c9a..3e19a1dbd 100644 --- a/src/competence-matcher/src/server.ts +++ b/src/competence-matcher/src/server.ts @@ -14,6 +14,7 @@ import { ensureAllHuggingfaceModelsAreAvailable } from './utils/huggingface'; import { EmbeddingTask } from './utils/types'; import { CompetenceMatcherError } from './utils/errors'; import { logError } from './middleware/logging'; +import workerManager from './worker/worker-manager'; const { port: PORT, verbose } = config; @@ -53,20 +54,32 @@ async function main() { if (verbose) { console.log('[Server] All required models are available'); } + + // Wait for worker pools to be ready + if (verbose) { + console.log('[Server] Waiting for worker pools to be ready...'); + } + await workerManager.ready(); + if (verbose) { + console.log('[Server] All worker pools are ready'); + } } catch (error) { const initError = new CompetenceMatcherError( - `Failed to initialise required models: ${error instanceof Error ? error.message : String(error)}`, + `Failed to initialise service: ${error instanceof Error ? error.message : String(error)}`, 'server_initialisation', 503, undefined, { - stage: 'model_initialisation', + stage: + error instanceof Error && error.message.includes('worker') + ? 'worker_initialisation' + : 'model_initialisation', originalError: error instanceof Error ? error.message : String(error), }, ); logError(initError, 'server_startup_failure'); - console.error('[Server] Failed to start due to model initialisation error'); + console.error('[Server] Failed to start due to initialisation error'); process.exit(1); } diff --git a/src/competence-matcher/src/utils/worker.ts b/src/competence-matcher/src/utils/worker.ts index d7b051825..dc65c7bbf 100644 --- a/src/competence-matcher/src/utils/worker.ts +++ b/src/competence-matcher/src/utils/worker.ts @@ -69,8 +69,8 @@ export async function withJobUpdates( } finally { clearTimeout(maxTimeCheck); db.close(); - parentPort!.close(); - process.exit(exitCode); + // Don't close parentPort or exit process for static worker pools + // Workers need to stay alive to process more jobs } } diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index e51f65949..aaf8d8dfc 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -1,25 +1,114 @@ -import { parentPort } from 'worker_threads'; +import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; import { splitSemantically } from '../tasks/semantic-split'; import { withJobUpdates } from '../utils/worker'; import { config } from '../config'; import { EmbeddingJob } from '../utils/types'; -parentPort!.once('message', async (job: EmbeddingJob) => { +const { verbose } = config; + +/** + * New embedder worker that stays alive and processes jobs sequentially + */ +if (!parentPort) { + throw new Error('This file must be run as a Worker thread'); +} + +// Set up health check handler immediately, before any heavy initialisation +parentPort.on('message', async (message: any) => { + // Handle health checks with highest priority + if (message?.type === 'health_check') { + if (verbose) { + console.log(`[Embedder Worker] Thread ${threadId} received health check ${message.checkId}`); + } + parentPort!.postMessage({ + type: 'health_check_response', + checkId: message.checkId, + timestamp: Date.now(), + workerType: 'embedder', + threadId: threadId, + }); + if (verbose) { + console.log(`[Embedder Worker] Thread ${threadId} sent health check response`); + } + return; + } + + // Handle job messages + const job = message as EmbeddingJob; + + // Set global job context for logging (global as any).CURRENT_JOB = job.jobId; - await withJobUpdates(job, async (db, { tasks, jobId }) => { - let work = tasks; - // TODO: This appears to cause the worker to crash silently - // Split tasks semantically - // work = await splitSemantically(tasks); + if (verbose) { + console.log( + `[Embedder Worker] Received and starting job ${job.jobId} with ${job.tasks.length} tasks`, + ); + } + + try { + await withJobUpdates(job, async (db, { tasks, jobId }) => { + let work = tasks; - // For each task: embed & upsert - for (const { listId, resourceId, competenceId, text, type } of work) { - const [vector] = await Embedding.embed(text); - // console.log(`Embedded text for job ${jobId}:`, text, '->', vector); + // TODO: Re-enable semantic splitting once the worker crash issue is resolved + // Split tasks semantically + // work = await splitSemantically(tasks); - db.upsertEmbedding({ listId, resourceId, competenceId, text, type, embedding: vector }); + // Process each embedding task + for (const { listId, resourceId, competenceId, text, type } of work) { + try { + // Generate embedding for the text + const [vector] = await Embedding.embed(text); + + if (verbose) { + console.log(`[Embedder Worker] Generated embedding for ${type} text (job ${jobId})`); + } + + // Store embedding in database + db.upsertEmbedding({ + listId, + resourceId, + competenceId, + text, + type, + embedding: vector, + }); + } catch (error) { + // Log the error but continue with other tasks + parentPort!.postMessage({ + type: 'error', + jobId, + error: `Failed to process embedding task: ${error instanceof Error ? error.message : String(error)}`, + }); + } + } + }); + + // Notify that job is completed + parentPort!.postMessage({ + type: 'job_completed', + jobId: job.jobId, + }); + + if (verbose) { + console.log(`[Embedder Worker] Completed job ${job.jobId}`); } - }); + } catch (error) { + // Handle job-level errors + parentPort!.postMessage({ + type: 'error', + jobId: job.jobId, + error: `Job failed: ${error instanceof Error ? error.message : String(error)}`, + }); + + // Still notify completion so the worker can move to next job + parentPort!.postMessage({ + type: 'job_completed', + jobId: job.jobId, + }); + } }); + +if (verbose) { + console.log(`[Embedder Worker] Worker thread ${threadId} ready to process embedding jobs`); +} diff --git a/src/competence-matcher/src/worker/matcher-new.ts b/src/competence-matcher/src/worker/matcher-new.ts new file mode 100644 index 000000000..e69de29bb diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 7e2fe7a93..78a487999 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -1,130 +1,209 @@ -import { parentPort } from 'worker_threads'; +import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; import { withJobUpdates } from '../utils/worker'; import { addReason } from '../tasks/reason'; import { Match, MatchingJob } from '../utils/types'; import ZeroShot from '../tasks/semantic-zeroshot'; +import { config } from '../config'; -parentPort!.once('message', async (job: MatchingJob) => { - // For workaround: - const matchResults: { [description: string]: any[] } = {}; - for (const task of job.tasks) { - const { taskId, name, description, executionInstructions, requiredCompetencies } = task; - if (!description) { - continue; // Skip tasks without description +const { verbose } = config; + +/** + * New matcher worker that stays alive and processes jobs sequentially + */ +if (!parentPort) { + throw new Error('This file must be run as a Worker thread'); +} + +// Set up health check handler immediately, before any heavy initialisation +parentPort.on('message', async (message: any) => { + // Handle health checks with highest priority + if (message?.type === 'health_check') { + if (verbose) { + console.log(`[Matcher Worker] Thread ${threadId} received health check ${message.checkId}`); + } + parentPort!.postMessage({ + type: 'health_check_response', + checkId: message.checkId, + timestamp: Date.now(), + workerType: 'matcher', + threadId: threadId, + }); + if (verbose) { + console.log(`[Matcher Worker] Thread ${threadId} sent health check response`); } - // Add task description to match results - matchResults[description] = []; + return; } - await withJobUpdates( - job, - async (db, { jobId, tasks, listId: listIdFilter, resourceId: resourceIdFilter }) => { - for (const task of tasks) { - const { taskId, name, description, executionInstructions, requiredCompetencies } = task; - if (!description) { - continue; // Skip tasks without description - } - // Embed the task description - const [vector] = await Embedding.embed(description); - - // Search for matches in the competence list (and resource if provided) - let matches: Match[] = db.searchEmbedding(vector, { - filter: { - listId: listIdFilter, - resourceId: resourceIdFilter, // Optional: If matching against a single resource - }, - }); - - // TODO: This appears to cause the worker to not start at all - // Invert potentially contrastive matches - // Add reasoning for matching score - // matches = await addReason(matches, description); - - for (const match of matches) { - // Check for semantic opposites - const zeroshotText = `Task description: ${description}\nSkill/Capability description: ${match.text}`; - // From unsuitable to suitable - const contraLabels = ['contradicting', 'aligning']; - const contraHypothesis = 'Task description and Skill/Capability descriptions are {}.'; - const scalingLabls = ['perfect', 'mediocre']; - const scalingHypothesis = - 'Task description and Skill/Capability descriptions are a {} match.'; - const labelScalar = [ - 0.25, // Contradicting matches should be penalised - 0.5, // Scale it down a bit to avoid too high scores for irrelevant matches - 1, // keep the best matches as is - ]; - const contraClassification = await ZeroShot.classify( - zeroshotText, - contraLabels, - contraHypothesis, - ); - let flag: 'contradicting' | 'neutral' | 'aligning' = 'neutral'; - // console.log(contraClassification); - - // @ts-ignore - if (contraClassification.labels[0] === contraLabels[0]) { - // Invert the match distance (since it's normalised to [0,1]: 1 - distance) - match.distance = (1 - match.distance) * labelScalar[0]; - flag = 'contradicting'; - } else { - const scalingClassification = await ZeroShot.classify( - zeroshotText, - scalingLabls, - scalingHypothesis, - ); - - // console.log(scalingClassification); - if ( - // @ts-ignore - scalingClassification.labels[0] === scalingLabls[0] && - // @ts-ignore - scalingClassification.scores[0] > 0.65 - ) { - // Keep the match as is - match.distance *= labelScalar[2]; - flag = 'aligning'; - } - // @ts-ignore - else if (scalingClassification.labels[0] === scalingLabls[1]) { - // Scale it down a bit - match.distance *= labelScalar[1]; - flag = 'neutral'; + // Handle job messages + const job = message as MatchingJob; + + // Set global job context for logging + (global as any).CURRENT_JOB = job.jobId; + + if (verbose) { + console.log( + `[Matcher Worker] Received and starting job ${job.jobId} with ${job.tasks.length} tasks`, + ); + } + + try { + // Store match results for reasoning workaround + const matchResults: { [description: string]: any[] } = {}; + for (const task of job.tasks) { + const { description } = task; + if (!description) { + continue; // Skip tasks without description + } + matchResults[description] = []; + } + + await withJobUpdates( + job, + async (db, { jobId, tasks, listId: listIdFilter, resourceId: resourceIdFilter }) => { + for (const task of tasks) { + const { taskId, name, description, executionInstructions, requiredCompetencies } = task; + + if (!description) { + if (verbose) { + console.log(`[Matcher Worker] Skipping task ${taskId} - no description provided`); } + continue; // Skip tasks without description } - // db.addMatchResult({ - // jobId, - // taskId, - // competenceId: match.competenceId, - // text: match.text, - // type: match.type as 'name' | 'description' | 'proficiencyLevel', - // distance: match.distance, - // reason: match.reason, - // }); - // } - - // Workaround to avoid the worker crashing silently - matchResults[description].push({ - jobId, - taskId, - taskText: description, - competenceId: match.competenceId, - resourceId: match.resourceId, - text: match.text, - type: match.type as 'name' | 'description' | 'proficiencyLevel', - alignment: flag, - distance: match.distance, - reason: match.reason, - }); + try { + // Generate embedding for the task description + // Note: This could potentially be optimised by having the embedder worker handle this + // and passing the embedding directly, but for now we keep the same approach + const [vector] = await Embedding.embed(description); + + if (verbose) { + console.log( + `[Matcher Worker] Generated task embedding for job ${jobId}, task ${taskId}`, + ); + } + + // Search for matches in the competence database + let matches: Match[] = db.searchEmbedding(vector, { + filter: { + listId: listIdFilter, + resourceId: resourceIdFilter, // Optional: If matching against a single resource + }, + }); + + if (verbose) { + console.log( + `[Matcher Worker] Found ${matches.length} initial matches for task ${taskId}`, + ); + } + + // TODO: Re-enable reasoning once worker stability issues are resolved + // Apply reasoning to each match to enhance context + // matches = await addReason(description, matches); + + // Zero-shot classification for scaling scores based on alignment + const scalingLabels = ['conflicting', 'neutral', 'aligning']; + const labelScalar = [0.8, 1.0, 1.2]; + + // Process each match + for (const match of matches) { + try { + let flag = 'neutral'; // Default flag + + // Apply zero-shot classification + const scalingClassification = await ZeroShot.classify( + `Task: ${description} | Competence: ${match.text}`, + scalingLabels, + ); + + if (scalingClassification) { + // @ts-ignore - ZeroShot classification result structure + if ( + scalingClassification.labels[0] === scalingLabels[2] && + // @ts-ignore + scalingClassification.scores[0] > 0.65 + ) { + // Perfect match - keep as is + match.distance *= labelScalar[2]; + flag = 'aligning'; + } + // @ts-ignore - ZeroShot classification result structure + else if (scalingClassification.labels[0] === scalingLabels[1]) { + // Mediocre match - scale it down + match.distance *= labelScalar[1]; + flag = 'neutral'; + } + } + + // Store match result for reasoning workaround + matchResults[description].push({ + jobId, + taskId, + taskText: description, + competenceId: match.competenceId, + resourceId: match.resourceId, + text: match.text, + type: match.type as 'name' | 'description' | 'proficiencyLevel', + alignment: flag, + distance: match.distance, + reason: match.reason, + }); + } catch (error) { + // Log error for individual match processing but continue + parentPort!.postMessage({ + type: 'error', + jobId, + error: `Failed to process match for task ${taskId}: ${error instanceof Error ? error.message : String(error)}`, + }); + } + } + } catch (error) { + // Log error for task processing but continue with other tasks + parentPort!.postMessage({ + type: 'error', + jobId, + error: `Failed to process task ${taskId}: ${error instanceof Error ? error.message : String(error)}`, + }); + } } - } - }, - { - onDone: () => { - parentPort!.postMessage({ type: 'job', job: 'reason', workload: matchResults }); }, - }, - ); + { + // When job processing is done, send results for reasoning + onDone: () => { + parentPort!.postMessage({ + type: 'job', + job: 'reason', + workload: matchResults, + }); + }, + }, + ); + + // Notify that job is completed + parentPort!.postMessage({ + type: 'job_completed', + jobId: job.jobId, + }); + + if (verbose) { + console.log(`[Matcher Worker] Completed job ${job.jobId}`); + } + } catch (error) { + // Handle job-level errors + parentPort!.postMessage({ + type: 'error', + jobId: job.jobId, + error: `Job failed: ${error instanceof Error ? error.message : String(error)}`, + }); + + // Still notify completion so the worker can move to next job + parentPort!.postMessage({ + type: 'job_completed', + jobId: job.jobId, + }); + } }); + +if (verbose) { + console.log(`[Matcher Worker] Worker thread ${threadId} ready to process matching jobs`); +} diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index b765dcce0..1ec5b0c31 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -1,221 +1,639 @@ import { Worker } from 'worker_threads'; import { config } from '../config'; import { createWorker } from '../utils/worker'; -import { splitSemantically } from '../tasks/semantic-split'; -import { Match, WorkerQueue, workerTypes } from '../utils/types'; -import { addReason } from '../tasks/reason'; -import { getDB } from '../utils/db'; +import { EmbeddingJob, MatchingJob, workerTypes, Match } from '../utils/types'; import { WorkerError, DatabaseError, ReasoningError } from '../utils/errors'; import { logError } from '../middleware/logging'; +import { addReason } from '../tasks/reason'; +import { getDB } from '../utils/db'; -const { verbose, maxWorkerThreads } = config; - -class WorkerManager { - private concurrency: number; - private queue: WorkerQueue[] = []; - private active: Set = new Set(); +const { verbose, embeddingWorkers, matchingWorkers } = config; + +// Job queue interface for task-specific queues +interface JobQueueItem { + job: EmbeddingJob | MatchingJob; + options?: { + onOnline?: (job: any) => void; + onExit?: (job: any, code: number) => void; + onError?: (job: any, error: Error) => void; + onMessage?: (job: any, message: any) => void; + }; +} - constructor(concurrency: number) { - this.concurrency = concurrency; +// Worker pool for a specific task type +class WorkerPool { + private workers: Worker[] = []; + private queue: JobQueueItem[] = []; + private readonly workerType: workerTypes; + private readonly poolSize: number; + private availableWorkers: Set = new Set(); + private busyWorkers: Map = new Map(); // Maps worker to current jobId + private pendingHealthChecks: Map = new Map(); // Track pending health checks + private beingReplaced: Set = new Set(); // Track workers being replaced to prevent double replacement + + constructor(workerType: workerTypes, poolSize: number) { + this.workerType = workerType; + this.poolSize = poolSize; + this.initialiseWorkers(); } /** - * Enqueue a job for the named worker script + * Initialise static worker pool - workers stay alive and process jobs sequentially */ - public enqueue(job: any, workerScript: workerTypes, options: WorkerQueue['options'] = {}) { - this.queue.push({ job, workerScript, options }); + private initialiseWorkers() { + for (let i = 0; i < this.poolSize; i++) { + this.createAndRegisterWorker(); + } if (verbose) { - console.log( - `[WorkerManager] Enqueued ${workerScript} job ${job.jobId || 'unknown'} (queue: ${this.queue.length}, active: ${this.active.size})`, - ); + console.log(`[WorkerPool] Initialised ${this.poolSize} ${this.workerType} workers`); } - - this.dispatch(); } - /** Try to start as many queued jobs as we have free threads */ - private dispatch() { - while (this.active.size < this.concurrency && this.queue.length > 0) { - const { job, workerScript, options } = this.queue.shift()!; - this.startWorker(job, workerScript, options); + /** + * Create a new worker and register it in the pool + */ + private createAndRegisterWorker() { + // Check if we already have enough workers (including those pending health checks) + if (this.workers.length >= this.poolSize) { + if (verbose) { + console.log( + `[WorkerPool] Not creating new ${this.workerType} worker - pool already has ${this.workers.length}/${this.poolSize} workers`, + ); + } + return; } - } - - /** Spawn one worker, hook up its lifecycle, and send the job */ - private startWorker(job: any, workerScript: workerTypes, options: WorkerQueue['options']) { - const jobId = job.jobId || 'unknown'; let worker: Worker; try { - worker = createWorker(workerScript); + worker = createWorker(this.workerType); } catch (error) { const workerError = new WorkerError( - workerScript, - jobId, + this.workerType, + 'unknown', error instanceof Error ? error : new Error(String(error)), ); - logError(workerError, 'worker_creation_failure', undefined, { - workerScript, - jobId, - queueLength: this.queue.length, - activeWorkers: this.active.size, + logError(workerError, 'worker_pool_creation_failure', undefined, { + workerType: this.workerType, + poolSize: this.poolSize, }); - - options?.onError?.(job, workerError); return; } - this.active.add(worker); + this.workers.push(worker); + // DON'T add to availableWorkers yet - only after health check passes + + // Perform health check after worker is online + worker.once('online', () => { + this.performHealthCheck(worker); + }); + // Handle worker lifecycle events worker.once('online', () => { if (verbose) { - console.log(`[WorkerManager] Worker for ${workerScript} job ${jobId} started`); + console.log(`[WorkerPool] ${this.workerType} worker ${worker.threadId} is online`); } + }); - try { - worker.postMessage(job); - options?.onOnline?.(job); - } catch (error) { - const messageError = new WorkerError( - workerScript, - jobId, - error instanceof Error ? error : new Error(String(error)), - ); + worker.on('error', (err) => { + const jobId = this.busyWorkers.get(worker) || 'unknown'; + const workerError = new WorkerError( + this.workerType, + jobId, + err instanceof Error ? err : new Error(String(err)), + ); - logError(messageError, 'worker_message_send_failure', undefined, { - workerScript, - jobId, - }); + logError(workerError, 'worker_pool_runtime_error', undefined, { + workerType: this.workerType, + threadId: worker.threadId, + jobId, + }); - this.active.delete(worker); - worker.terminate(); - this.dispatch(); - options?.onError?.(job, messageError); - } + // Mark worker as available again and try to process next job + this.markWorkerAvailable(worker); + this.processNextJob(); }); - // When the worker exits (success or failure), remove from active set & dispatch next + // Handle unexpected worker exits - restart the worker worker.once('exit', (code) => { - this.active.delete(worker); + const jobId = this.busyWorkers.get(worker) || 'unknown'; - if (code === 1) { - const exitError = new WorkerError( - workerScript, - jobId, - new Error(`Worker exited with failure code: ${code}`), + if (verbose) { + console.log( + `[WorkerPool] ${this.workerType} worker ${worker.threadId} exited with code ${code}`, ); + } - logError(exitError, 'worker_exit_failure', undefined, { - workerScript, - jobId, - exitCode: code, - }); - } else if (code === 0) { + // Check if this worker is already being replaced to prevent double replacement + if (this.beingReplaced.has(worker)) { if (verbose) { console.log( - `[WorkerManager] Worker for ${workerScript} job ${jobId} completed successfully`, + `[WorkerPool] Worker ${worker.threadId} already being replaced, skipping duplicate replacement`, ); } - } else if (code === 2) { - const timeoutError = new WorkerError( - workerScript, - jobId, - new Error('Worker timed out during execution'), - ); - - logError(timeoutError, 'worker_timeout', undefined, { - workerScript, - jobId, - exitCode: code, - }); - } else { - const unexpectedExitError = new WorkerError( - workerScript, - jobId, - new Error(`Worker exited with unexpected code: ${code}`), - ); - - logError(unexpectedExitError, 'worker_unexpected_exit', undefined, { - workerScript, - jobId, - exitCode: code, - }); + this.removeWorkerFromPool(worker); + return; } - this.dispatch(); - options?.onExit?.(job, code); + // Remove from all tracking sets/maps and create replacement + this.removeWorkerFromPool(worker); + this.createAndRegisterWorker(); }); + } - worker.once('error', (err) => { - const workerError = new WorkerError( - workerScript, - jobId, - err instanceof Error ? err : new Error(String(err)), + /** + * Add a job to this pool's queue + */ + public enqueue(job: EmbeddingJob | MatchingJob, options?: JobQueueItem['options']) { + this.queue.push({ job, options }); + + if (verbose) { + console.log( + `[WorkerPool] Enqueued ${this.workerType} job ${job.jobId} (queue: ${this.queue.length}, available: ${this.availableWorkers.size})`, ); + } - logError(workerError, 'worker_runtime_error', undefined, { - workerScript, - jobId, - }); + this.processNextJob(); + } - options?.onError?.(job, workerError); - }); + /** + * Process the next job in queue if workers are available + */ + private processNextJob() { + if (this.queue.length === 0 || this.availableWorkers.size === 0) { + return; + } + + const worker = this.availableWorkers.values().next().value as Worker; + const queueItem = this.queue.shift()!; + + this.assignJobToWorker(worker, queueItem); + } - worker.on('message', async (message) => { + /** + * Assign a specific job to a specific worker + */ + private assignJobToWorker(worker: Worker, { job, options }: JobQueueItem) { + // Mark worker as busy + this.availableWorkers.delete(worker); + this.busyWorkers.set(worker, job.jobId); + + if (verbose) { + console.log( + `[WorkerPool] Assigning ${this.workerType} job ${job.jobId} to worker ${worker.threadId}`, + ); + } + + // Set up message handling for this specific job + const messageHandler = (message: any) => { try { switch (message.type) { case 'status': if (verbose) { console.log( - `[WorkerManager] Worker for job ${message.jobId} status: ${message.status}`, + `[WorkerPool] Worker ${worker.threadId} for job ${message.jobId || job.jobId} status: ${message.status}`, ); } break; case 'error': logError( - new WorkerError(workerScript, message.jobId || jobId, new Error(message.error)), + new WorkerError( + this.workerType, + message.jobId || job.jobId, + new Error(message.error), + ), 'worker_reported_error', undefined, - { workerScript, jobId: message.jobId || jobId, reportedError: message.error }, + { + workerType: this.workerType, + threadId: worker.threadId, + jobId: message.jobId || job.jobId, + reportedError: message.error, + }, ); break; case 'log': if (verbose) { console.log( - `[WorkerManager] Worker for job ${message.jobId} log: ${message.message}`, + `[WorkerPool] Worker ${worker.threadId} for job ${message.jobId || job.jobId} log: ${message.message}`, ); } break; - - // Workaround for adding reasoning before saving in DB + case 'job_completed': + // Job is done, mark worker as available and process next job + this.markWorkerAvailable(worker); + this.processNextJob(); + options?.onExit?.(job, 0); + break; case 'job': - switch (message.job) { - case 'reason': - await handleReasoning(job, message); - break; + // Handle special job messages (like reasoning requests from matching workers) + if (message.job === 'reason') { + // Handle reasoning asynchronously without blocking the worker + handleReasoning(job, message).catch((error: any) => { + logError( + error instanceof Error ? error : new Error(String(error)), + 'reasoning_handler_async_failure', + undefined, + { jobId: job.jobId }, + ); + }); } break; } + options?.onMessage?.(job, message); } catch (error) { const messageHandlingError = new WorkerError( - workerScript, - jobId, + this.workerType, + job.jobId, error instanceof Error ? error : new Error(String(error)), ); logError(messageHandlingError, 'worker_message_handling_error', undefined, { - workerScript, - jobId, + workerType: this.workerType, + threadId: worker.threadId, + jobId: job.jobId, messageType: message.type, }); } + }; + + worker.on('message', messageHandler); + + // Send the job to the worker + try { + worker.postMessage(job); + options?.onOnline?.(job); + } catch (error) { + const messageError = new WorkerError( + this.workerType, + job.jobId, + error instanceof Error ? error : new Error(String(error)), + ); + + logError(messageError, 'worker_message_send_failure', undefined, { + workerType: this.workerType, + threadId: worker.threadId, + jobId: job.jobId, + }); + + // Remove the message handler and mark worker as available + worker.off('message', messageHandler); + this.markWorkerAvailable(worker); + this.processNextJob(); + options?.onError?.(job, messageError); + } + } + + /** + * Perform health check on a worker to ensure it's responsive + */ + private performHealthCheck(worker: Worker) { + const healthCheckId = `health_check_${Date.now()}_${worker.threadId}`; + const timeout = 20000; // 20 seconds timeout for model loading + + if (verbose) { + console.log( + `[WorkerPool] Performing health check on ${this.workerType} worker ${worker.threadId}`, + ); + } + + // Set up timeout for health check + const healthCheckTimeout = setTimeout(() => { + logError( + new WorkerError(this.workerType, healthCheckId, new Error('Health check timeout')), + 'worker_health_check_timeout', + undefined, + { + workerType: this.workerType, + threadId: worker.threadId, + timeout, + }, + ); + + // Clean up pending health check + this.pendingHealthChecks.delete(worker); + + // Mark worker as being replaced to prevent double replacement + this.beingReplaced.add(worker); + + // Terminate unresponsive worker explicitly + try { + worker.terminate(); + if (verbose) { + console.log( + `[WorkerPool] Terminated unresponsive ${this.workerType} worker ${worker.threadId}`, + ); + } + } catch (error) { + if (verbose) { + console.log(`[WorkerPool] Failed to terminate worker ${worker.threadId}:`, error); + } + } + + // Remove unresponsive worker and create a replacement + this.removeWorkerFromPool(worker); + this.createAndRegisterWorker(); + }, timeout); + + // Store the timeout so we can clear it if worker responds + this.pendingHealthChecks.set(worker, healthCheckTimeout); + + // Listen for health check response + const healthCheckHandler = (message: any) => { + if (message?.type === 'health_check_response' && message?.checkId === healthCheckId) { + // Clear the timeout since worker responded + const timeout = this.pendingHealthChecks.get(worker); + if (timeout) { + clearTimeout(timeout); + this.pendingHealthChecks.delete(worker); + } + + worker.off('message', healthCheckHandler); + + // NOW mark worker as available since it passed health check + this.availableWorkers.add(worker); + + if (verbose) { + console.log( + `[WorkerPool] ${this.workerType} worker ${worker.threadId} passed health check and is now available`, + ); + } + + // Process any queued jobs now that we have an available worker + this.processNextJob(); + } + }; + + worker.on('message', healthCheckHandler); + + // Send health check request + try { + worker.postMessage({ + type: 'health_check', + checkId: healthCheckId, + timestamp: Date.now(), + }); + } catch (error) { + // Clear the timeout and pending health check + const timeout = this.pendingHealthChecks.get(worker); + if (timeout) { + clearTimeout(timeout); + this.pendingHealthChecks.delete(worker); + } + + worker.off('message', healthCheckHandler); + + logError( + new WorkerError( + this.workerType, + healthCheckId, + error instanceof Error ? error : new Error(String(error)), + ), + 'worker_health_check_send_failure', + undefined, + { + workerType: this.workerType, + threadId: worker.threadId, + }, + ); + + // Remove faulty worker and create a replacement + this.removeWorkerFromPool(worker); + this.createAndRegisterWorker(); + } + } + + /** + * Remove a worker from all tracking structures + */ + private removeWorkerFromPool(worker: Worker) { + this.availableWorkers.delete(worker); + this.busyWorkers.delete(worker); + this.beingReplaced.delete(worker); + + // Clean up any pending health check + const timeout = this.pendingHealthChecks.get(worker); + if (timeout) { + clearTimeout(timeout); + this.pendingHealthChecks.delete(worker); + } + + const index = this.workers.indexOf(worker); + if (index > -1) { + this.workers.splice(index, 1); + } + + try { + worker.terminate(); + } catch (error) { + // Ignore termination errors + } + } /** + * Mark a worker as available for new jobs + */ + private markWorkerAvailable(worker: Worker) { + this.busyWorkers.delete(worker); + this.availableWorkers.add(worker); + } + + /** + * Get pool statistics + */ + public getStats() { + return { + workerType: this.workerType, + totalWorkers: this.workers.length, + availableWorkers: this.availableWorkers.size, + busyWorkers: this.busyWorkers.size, + pendingHealthChecks: this.pendingHealthChecks.size, + queuedJobs: this.queue.length, + }; + } + + /** + * Shutdown all workers in this pool + */ + public async shutdown() { + if (verbose) { + console.log(`[WorkerPool] Shutting down ${this.workerType} worker pool`); + } + + const shutdownPromises = this.workers.map((worker) => { + return new Promise((resolve) => { + worker.once('exit', () => resolve()); + worker.terminate(); + }); }); + + await Promise.all(shutdownPromises); + + this.workers.length = 0; + this.availableWorkers.clear(); + this.busyWorkers.clear(); + this.queue.length = 0; + } +} + +/** + * New WorkerManager with static worker pools per task type + */ +class WorkerManager { + private embeddingPool: WorkerPool; + private matchingPool: WorkerPool; + private readyPromise: Promise; + private isReady: boolean = false; + + constructor() { + // Initialise static worker pools + this.embeddingPool = new WorkerPool('embedder', embeddingWorkers); + this.matchingPool = new WorkerPool('matcher', matchingWorkers); + + if (verbose) { + console.log( + `[WorkerManager] Initialised with ${embeddingWorkers} embedding workers and ${matchingWorkers} matching workers`, + ); + } + + // Create ready promise + this.readyPromise = this.waitForWorkersReady(); + } + + /** + * Wait for all worker pools to have at least one available worker + */ + private async waitForWorkersReady(): Promise { + if (verbose) { + console.log('[WorkerManager] Waiting for worker pools to become ready...'); + } + + const maxWaitTime = 30_000; // 30 seconds max wait + const checkInterval = 500; // Check every 500ms + const startTime = Date.now(); + + return new Promise((resolve, reject) => { + const checkReady = () => { + const embeddingStats = this.embeddingPool.getStats(); + const matchingStats = this.matchingPool.getStats(); + + const embeddingReady = embeddingStats.availableWorkers > 0; + const matchingReady = matchingStats.availableWorkers > 0; + + if (embeddingReady && matchingReady) { + this.isReady = true; + if (verbose) { + console.log(`[WorkerManager] All worker pools ready: + - Embedding workers: ${embeddingStats.totalWorkers} total, ${embeddingStats.availableWorkers} available + - Matching workers: ${matchingStats.totalWorkers} total, ${matchingStats.availableWorkers} available`); + } + resolve(); + } else if (Date.now() - startTime > maxWaitTime) { + reject(new Error('Timeout waiting for worker pools to become ready')); + } else { + if (verbose) { + console.log( + `[WorkerManager] Waiting for workers... Embedding: ${embeddingReady ? '✓' : '✗'}, Matching: ${matchingReady ? '✓' : '✗'}`, + ); + } + setTimeout(checkReady, checkInterval); + } + }; + + // Start checking after a short delay to allow workers to initialise + setTimeout(checkReady, 1_000); + }); + } + + /** + * Promise that resolves when all worker pools are ready + */ + public ready(): Promise { + return this.readyPromise; + } + + /** + * Check if worker manager is ready (synchronous) + */ + public isWorkerManagerReady(): boolean { + return this.isReady; + } + + /** + * Perform initial health check on all worker pools + * @deprecated Use ready() promise instead + */ + private performInitialHealthCheck() { + if (verbose) { + console.log('[WorkerManager] Performing initial health check on all worker pools'); + } + + const embeddingStats = this.embeddingPool.getStats(); + const matchingStats = this.matchingPool.getStats(); + + console.log(`[WorkerManager] Health check complete: + - Embedding workers: ${embeddingStats.totalWorkers} total, ${embeddingStats.availableWorkers} available, ${embeddingStats.pendingHealthChecks} pending health checks + - Matching workers: ${matchingStats.totalWorkers} total, ${matchingStats.availableWorkers} available, ${matchingStats.pendingHealthChecks} pending health checks`); + + if (embeddingStats.totalWorkers === 0 || matchingStats.totalWorkers === 0) { + console.error('[WorkerManager] WARNING: Some worker pools have no active workers!'); + } + } + + /** + * Enqueue an embedding job + */ + public enqueueEmbedding(job: EmbeddingJob, options?: JobQueueItem['options']) { + this.embeddingPool.enqueue(job, options); + } + + /** + * Enqueue a matching job + */ + public enqueueMatching(job: MatchingJob, options?: JobQueueItem['options']) { + this.matchingPool.enqueue(job, options); + } + + /** + * Generic enqueue method for backward compatibility + * @deprecated Use enqueueEmbedding or enqueueMatching instead + */ + public enqueue(job: any, workerScript: workerTypes, options?: JobQueueItem['options']) { + if (workerScript === 'embedder') { + this.enqueueEmbedding(job as EmbeddingJob, options); + } else if (workerScript === 'matcher') { + this.enqueueMatching(job as MatchingJob, options); + } else { + throw new Error(`Unknown worker type: ${workerScript}`); + } + } + + /** + * Get statistics for all worker pools + */ + public getStats() { + return { + embedding: this.embeddingPool.getStats(), + matching: this.matchingPool.getStats(), + }; + } + + /** + * Shutdown all worker pools + */ + public async shutdown() { + if (verbose) { + console.log('[WorkerManager] Shutting down all worker pools'); + } + + await Promise.all([this.embeddingPool.shutdown(), this.matchingPool.shutdown()]); } } +/** + * Handle reasoning requests from matching workers + * This maintains the same functionality as the old system + */ async function handleReasoning(job: any, message: any) { const jobId = job.jobId || 'unknown'; @@ -263,7 +681,7 @@ async function handleReasoning(job: any, message: any) { } } - // Save in DB + // Save results in database let db; try { db = getDB(job.dbName); @@ -293,7 +711,7 @@ async function handleReasoning(job: any, message: any) { } } - // Update job status + // Update job status to completed try { db.updateJobStatus(job.jobId, 'completed'); @@ -334,6 +752,6 @@ async function handleReasoning(job: any, message: any) { } } -// export a singleton instance -const manager = new WorkerManager(maxWorkerThreads); +// Export singleton instance +const manager = new WorkerManager(); export default manager; From 00b663274cef8598cd1cf811d1434765d74a5350 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:26:48 +0200 Subject: [PATCH 16/48] Refactor logging system across competence matcher - Replaced console logging with a centralized logger in model, ollama, worker, and embedder modules. - Introduced structured logging with log levels (DEBUG, INFO, WARN, ERROR) and log types (server, request, worker, etc.). - Enhanced worker context management to propagate request IDs and log worker activities. - Removed verbose flag usage and replaced it with appropriate logging levels. - Improved error handling and logging in worker pools and job processing. - Cleaned up deprecated log structures and ensured consistent logging practices throughout the codebase. --- src/competence-matcher/src/config.ts | 14 +- .../src/middleware/error-handler.ts | 21 +- .../src/middleware/logging.ts | 181 ---------- .../src/middleware/match.ts | 71 ++-- .../src/middleware/request-logger.ts | 69 ++++ .../src/middleware/resource.ts | 53 ++- src/competence-matcher/src/server.ts | 52 +-- src/competence-matcher/src/tasks/embedding.ts | 7 +- src/competence-matcher/src/tasks/reason.ts | 72 ++-- .../src/tasks/semantic-split.ts | 63 ++-- .../src/tasks/semantic-zeroshot.ts | 7 +- .../src/utils/huggingface.ts | 24 +- src/competence-matcher/src/utils/logger.ts | 316 +++++++++++++++++ src/competence-matcher/src/utils/model.ts | 6 +- src/competence-matcher/src/utils/ollama.ts | 42 ++- src/competence-matcher/src/utils/types.ts | 61 +++- src/competence-matcher/src/utils/worker.ts | 187 ++++++++++ src/competence-matcher/src/worker/embedder.ts | 79 +++-- .../src/worker/matcher-new.ts | 0 src/competence-matcher/src/worker/matcher.ts | 47 +-- src/competence-matcher/src/worker/test.ts | 9 - .../src/worker/worker-manager.ts | 333 +++++++++++------- 22 files changed, 1135 insertions(+), 579 deletions(-) delete mode 100644 src/competence-matcher/src/middleware/logging.ts create mode 100644 src/competence-matcher/src/middleware/request-logger.ts create mode 100644 src/competence-matcher/src/utils/logger.ts delete mode 100644 src/competence-matcher/src/worker/matcher-new.ts delete mode 100644 src/competence-matcher/src/worker/test.ts diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index b46b08387..9b90a2f0d 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -2,7 +2,6 @@ import dotenv from 'dotenv'; dotenv.config({ path: '.env' }); import * as os from 'node:os'; -import path from 'node:path'; export const config = { dbPath: process.env.DB_PATH || 'src/db/dbs/', @@ -19,12 +18,17 @@ export const config = { splittingModel: process.env.SPLITTING_MODEL || 'llama3.2', splittingLength: parseInt(process.env.SPLITTING_LENGTH || '1000', 10), // Set this to 0 to disable splitting reasonModel: process.env.REASON_MODEL || 'llama3.2', - splittingSymbol: process.env.SPLITTING_SYMBOL || 'SPLITTING_SYMBOL', + splittingSymbol: process.env.SPLITTING_SYMBOL || '', maxWorkerThreads: parseInt(process.env.NUMBER_OF_THREADS || String(os.cpus().length - 1), 10), // -1 for main thread (kept for backward compatibility) embeddingWorkers: parseInt(process.env.EMBEDDING_WORKERS || '1', 10), // Number of embedding workers to keep alive matchingWorkers: parseInt(process.env.MATCHING_WORKERS || '1', 10), // Number of matching workers to keep alive maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds - verbose: process.env.VERBOSE === 'true' || false, - logDir: process.env.LOG_DIR || path.join(process.cwd(), 'logs'), - logFile: process.env.LOG_FILE || path.join(process.cwd(), 'logs', 'competence-matcher.log'), + logLevel: process.env.LOG_LEVEL || 'INFO', // Levels: 'DEBUG', 'INFO', 'WARN', 'ERROR' + logTypes: process.env.LOG_TYPES || 'server,request,worker,database,model,system', + logToConsole: process.env.LOG_CONSOLE !== 'false', // Default to true unless explicitly set to false + logToFile: process.env.LOG_FILE === 'true' || false, // Default to false unless explicitly set to true + logPath: process.env.LOG_PATH || 'logs/', + modelLoadingTimeout: parseInt(process.env.MODEL_LOADING_TIMEOUT || '20', 10), // Timeout for model loading in seconds + maxWorkerRetries: parseInt(process.env.MAX_WORKER_RETRIES || '3', 10), // Maximum worker restart attempts before escalating to ERROR + workerRetryWindow: parseInt(process.env.WORKER_RETRY_WINDOW || '300', 10) * 1_000, // Time window in seconds to reset retry count (converted to ms) }; diff --git a/src/competence-matcher/src/middleware/error-handler.ts b/src/competence-matcher/src/middleware/error-handler.ts index 026c5fe09..716de784c 100644 --- a/src/competence-matcher/src/middleware/error-handler.ts +++ b/src/competence-matcher/src/middleware/error-handler.ts @@ -1,10 +1,11 @@ import { Request, Response, NextFunction } from 'express'; import { CompetenceMatcherError } from '../utils/errors'; -import { logError } from './logging'; +import { getLogger } from '../utils/logger'; + +const logger = getLogger(); /** - * Central error handler middleware - * Catches all errors and provides consistent error responses + * Enhanced error handler middleware using the new logging system */ export function errorHandler( error: Error | CompetenceMatcherError, @@ -12,34 +13,36 @@ export function errorHandler( res: Response, next: NextFunction, ): void { - const requestId = (req as any).requestId || 'unknown'; + const requestId = req.requestId || 'unknown'; if (error instanceof CompetenceMatcherError) { // Handle our custom errors - logError(error, error.context, requestId, { + logger.error('request', `${error.context}: ${error.message}`, error, { statusCode: error.statusCode, details: error.details, path: req.path, method: req.method, - }); + requestId, + }, requestId); res.status(error.statusCode).json({ error: { message: error.message, context: error.context, - requestId: error.requestId || requestId, + requestId: requestId, ...(error.details && { details: error.details }), }, }); } else { // Handle unexpected errors - logError(error, 'unhandled_error', requestId, { + logger.error('system', 'Unhandled error occurred', error, { path: req.path, method: req.method, body: req.body, query: req.query, params: req.params, - }); + requestId, + }, requestId); res.status(500).json({ error: { diff --git a/src/competence-matcher/src/middleware/logging.ts b/src/competence-matcher/src/middleware/logging.ts deleted file mode 100644 index 2f7d2ee0e..000000000 --- a/src/competence-matcher/src/middleware/logging.ts +++ /dev/null @@ -1,181 +0,0 @@ -import { Request, Response, NextFunction } from 'express'; -import * as fs from 'node:fs'; -import * as path from 'node:path'; - -import { config } from '../config'; -import { randomUUID } from 'node:crypto'; -import { LogEntry } from '../utils/types'; - -const { verbose, logDir, logFile } = config; - -const today = new Date().toISOString().split('T')[0]; // Format: YYYY-MM-DD -const logFilePath = `${logFile.replace('.log', '')}-${today}.log`; - -// Create logs directory if it doesn't exist -if (!fs.existsSync(logDir)) { - fs.mkdirSync(logDir, { recursive: true }); -} - -// Generate unique request ID -function generateRequestId(): string { - return `req_${Date.now()}_${randomUUID()}`; -} - -// Write log entry to file -function writeLogToFile(logEntry: LogEntry): void { - const logLine = JSON.stringify(logEntry) + '\n'; - fs.appendFileSync(logFilePath, logLine, 'utf8'); -} - -// Log error with context -export function logError( - error: Error | string, - context: string, - requestId?: string, - additionalData?: any, -): void { - const errorMessage = error instanceof Error ? error.message : String(error); - const errorStack = error instanceof Error ? error.stack : undefined; - - const logEntry: LogEntry = { - timestamp: new Date().toISOString(), - requestId: requestId || 'system', - type: 'error', - error: errorMessage, - errorStack, - context, - ...additionalData, - }; - - // Console logging if verbose - if (verbose) { - console.error(`[${context}] Error:`, errorMessage); - if (errorStack && verbose) { - console.error('Stack trace:', errorStack); - } - if (additionalData) { - console.error('Additional data:', additionalData); - } - } - - // Write to log file - writeLogToFile(logEntry); -} - -// Enhanced request logger middleware -export function requestLogger(req: Request, res: Response, next: NextFunction): void { - const requestId = generateRequestId(); - const startTime = Date.now(); - - // Add requestId to request for later use - (req as any).requestId = requestId; - - const requestLogEntry: LogEntry = { - timestamp: new Date().toISOString(), - requestId, - type: 'request', - method: req.method, - path: req.path, - query: req.query, - body: req.body, - headers: req.headers, - params: req.params, - ip: req.ip, - realIp: req.headers['x-real-ip'] || req.headers['x-forwarded-for'] || req.ip, - }; - - // Console logging if verbose - if (verbose) { - console.table([ - { - timestamp: requestLogEntry.timestamp, - requestId: requestLogEntry.requestId, - type: requestLogEntry.type, - method: requestLogEntry.method, - path: requestLogEntry.path, - query: - typeof requestLogEntry.query === 'object' - ? JSON.stringify(requestLogEntry.query, null, 2) - : requestLogEntry.query, - headers: - typeof requestLogEntry.headers === 'object' - ? JSON.stringify(requestLogEntry.headers, null, 2) - : requestLogEntry.headers, - params: - typeof requestLogEntry.params === 'object' - ? JSON.stringify(requestLogEntry.params, null, 2) - : requestLogEntry.params, - ip: requestLogEntry.ip, - realIp: - typeof requestLogEntry.realIp === 'object' - ? JSON.stringify(requestLogEntry.realIp, null, 2) - : requestLogEntry.realIp, - }, - ]); - } - - // Write request to log file - writeLogToFile(requestLogEntry); - - // Override res.json to capture response - this is not needed as we will override res.send which appears to be used by res.json internally - // This is commented out to avoid double logging - // const originalJson = res.json; - // res.json = function (obj) { - // const responseTime = Date.now() - startTime; - - // const responseLogEntry: LogEntry = { - // timestamp: new Date().toISOString(), - // requestId, - // type: 'response', - // statusCode: res.statusCode, - // responseTime, - // body: obj, - // }; - - // // Console logging if verbose - // if (verbose) { - // console.table([responseLogEntry]); - // } - - // // Write response to log file - // writeLogToFile(responseLogEntry); - - // return originalJson.call(this, obj); - // }; - - // Override res.send to capture response - const originalSend = res.send; - res.send = function (body) { - const responseTime = Date.now() - startTime; - - const responseLogEntry: LogEntry = { - timestamp: new Date().toISOString(), - requestId, - type: 'response', - statusCode: res.statusCode, - responseTime, - body: body, - }; - - // Console logging if verbose - if (verbose) { - console.table([ - { - timestamp: responseLogEntry.timestamp, - requestId: responseLogEntry.requestId, - type: responseLogEntry.type, - statusCode: responseLogEntry.statusCode, - responseTime: responseLogEntry.responseTime, - // body: responseLogEntry.body, - }, - ]); - } - - // Write response to log file - writeLogToFile(responseLogEntry); - - return originalSend.call(this, body); - }; - - next(); -} diff --git a/src/competence-matcher/src/middleware/match.ts b/src/competence-matcher/src/middleware/match.ts index cebf3e3ab..32f2651da 100644 --- a/src/competence-matcher/src/middleware/match.ts +++ b/src/competence-matcher/src/middleware/match.ts @@ -18,7 +18,9 @@ import { DatabaseError, CompetenceMatcherError, } from '../utils/errors'; -import { logError } from './logging'; +import { getLogger } from '../utils/logger'; + +const logger = getLogger(); export function matchCompetenceList(req: Request, res: Response, next: NextFunction): void { const requestId = (req as any).requestId; @@ -211,13 +213,15 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct try { db.updateJobStatus(matchingJobId, 'failed'); } catch (error) { - logError( - new DatabaseError( - 'updateJobStatus', - error instanceof Error ? error : new Error(String(error)), - requestId, - ), - 'inline_job_failure_update', + const dbError = new DatabaseError( + 'updateJobStatus', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + logger.databaseError( + 'Failed to update job status to failed', + dbError, + { matchingJobId }, requestId, ); } @@ -227,13 +231,15 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct try { db.updateJobStatus(matchingJobId, 'pending'); } catch (error) { - logError( - new DatabaseError( - 'updateJobStatus', - error instanceof Error ? error : new Error(String(error)), - requestId, - ), - 'inline_job_pending_update', + const dbError = new DatabaseError( + 'updateJobStatus', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + logger.databaseError( + 'Failed to update job status to pending', + dbError, + { matchingJobId }, requestId, ); return; @@ -278,26 +284,31 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct try { db.updateJobStatus(matchingJobId, 'failed'); } catch (dbError) { - logError( - new DatabaseError( - 'updateJobStatus', - dbError instanceof Error ? dbError : new Error(String(dbError)), - requestId, - ), - 'inline_job_error_update', + const dbErrorObj = new DatabaseError( + 'updateJobStatus', + dbError instanceof Error ? dbError : new Error(String(dbError)), + requestId, + ); + logger.databaseError( + 'Failed to update job status after error', + dbErrorObj, + { matchingJobId }, requestId, ); } - logError( - new CompetenceMatcherError( - `Failed to create inline matching job: ${error instanceof Error ? error.message : String(error)}`, - 'inline_job_creation', - 500, - requestId, - { matchingJobId }, - ), + const matchingError = new CompetenceMatcherError( + `Failed to create inline matching job: ${error instanceof Error ? error.message : String(error)}`, 'inline_job_creation', + 500, + requestId, + { matchingJobId }, + ); + logger.error( + 'request', + 'Failed to create inline matching job', + matchingError, + { matchingJobId }, requestId, ); } diff --git a/src/competence-matcher/src/middleware/request-logger.ts b/src/competence-matcher/src/middleware/request-logger.ts new file mode 100644 index 000000000..923bd9d3e --- /dev/null +++ b/src/competence-matcher/src/middleware/request-logger.ts @@ -0,0 +1,69 @@ +import { Request, Response, NextFunction } from 'express'; +import { getLogger } from '../utils/logger'; + +const logger = getLogger(); + +// Extend Request interface to include requestId +declare global { + namespace Express { + interface Request { + requestId: string; + startTime: number; + } + } +} + +/** + * Request logger middleware + */ +export function requestLogger(req: Request, res: Response, next: NextFunction): void { + const requestId = logger.generateRequestId(); + const startTime = Date.now(); + + // Add requestId and startTime to request for later use + req.requestId = requestId; + req.startTime = startTime; + + // Log incoming request + logger.debug( + 'request', + `Incoming ${req.method} ${req.path}`, + { + query: req.query, + params: req.params, + ip: req.ip, + userAgent: req.headers['user-agent'], + contentType: req.headers['content-type'], + }, + requestId, + ); + + // Override res.send to capture response + // res.json seems to use .send internally, so we can override send + const originalSend = res.send; + res.send = function (body) { + const responseTime = Date.now() - startTime; + + // Log the request completion + logger.request(req.method, req.path, res.statusCode, responseTime, requestId, { + contentLength: res.get('content-length'), + responseSize: body ? Buffer.byteLength(body, 'utf8') : 0, + }); + + return originalSend.call(this, body); + }; + + next(); +} + +/** + * Middleware to extract requestId from existing requests for backward compatibility + */ +export function requestIdExtractor(req: Request, res: Response, next: NextFunction): void { + // If requestId is not already set, generate one + if (!req.requestId) { + req.requestId = logger.generateRequestId(); + req.startTime = Date.now(); + } + next(); +} diff --git a/src/competence-matcher/src/middleware/resource.ts b/src/competence-matcher/src/middleware/resource.ts index 179e0cf8f..0f3090fec 100644 --- a/src/competence-matcher/src/middleware/resource.ts +++ b/src/competence-matcher/src/middleware/resource.ts @@ -10,7 +10,9 @@ import { DatabaseError, CompetenceMatcherError, } from '../utils/errors'; -import { logError } from './logging'; +import { getLogger } from '../utils/logger'; + +const logger = getLogger(); export function getResourceLists(req: Request, res: Response, next: NextFunction): void { const requestId = (req as any).requestId; @@ -87,6 +89,7 @@ export function getResourceList(req: Request, res: Response, next: NextFunction) export async function handleCreateResourceList( dbName: string, resources: ResourceInput[], + requestId?: string, onWorkerExit?: (job: any, code: number, jobId: string) => void, ): Promise<{ jobId: string; status: string }> { let resourceIds: string[] = []; @@ -165,7 +168,15 @@ export async function handleCreateResourceList( }; }) .catch((err) => { - console.error('Error splitting semantically:', err); + logger.warn( + 'system', + 'Error during semantic splitting, falling back to original tasks', + { + error: err instanceof Error ? err.message : String(err), + taskCount: descriptionEmbeddingInput.length, + }, + requestId, + ); job = { jobId: jobId!, dbName: dbName, @@ -183,24 +194,54 @@ export async function handleCreateResourceList( } export function createResourceList(req: Request, res: Response, next: NextFunction): void { + const requestId = req.requestId; + if (!Array.isArray(req.body) || req.body.length === 0) { res.status(400).json({ error: 'Invalid request body. Expected an array of resources.' }); return; } try { - handleCreateResourceList(req.dbName!, req.body) + handleCreateResourceList(req.dbName!, req.body, requestId) .then(({ jobId, status }) => { + logger.debug( + 'request', + 'Resource list creation job created', + { + jobId, + status, + resourceCount: req.body.length, + }, + requestId, + ); + res .setHeader('Location', `${PATHS.resource}/jobs/${jobId}`) .status(202) .json({ jobId, status }); }) .catch((error) => { - console.error('Error adding resource list:', error); + logger.error( + 'request', + 'Error adding resource list', + error instanceof Error ? error : new Error(String(error)), + { + resourceCount: req.body.length, + }, + requestId, + ); res.status(400).json({ error: error.message || 'Invalid request body format' }); }); } catch (error) { - console.error('Error processing request body:', error); + logger.error( + 'request', + 'Error processing request body', + error instanceof Error ? error : new Error(String(error)), + { + bodyType: typeof req.body, + bodyLength: Array.isArray(req.body) ? req.body.length : 'not array', + }, + requestId, + ); res.status(400).json({ error: 'Invalid request body format' }); } } @@ -230,7 +271,7 @@ export function getJobStatus(req: Request, res: Response) { return; } } catch (err) { - // console.error(err); + // logger.error("system", err); res.status(404).json({ error: 'Job not found' }); } } diff --git a/src/competence-matcher/src/server.ts b/src/competence-matcher/src/server.ts index 3e19a1dbd..1451fb25d 100644 --- a/src/competence-matcher/src/server.ts +++ b/src/competence-matcher/src/server.ts @@ -1,22 +1,23 @@ import express from 'express'; +import { config } from './config'; +import { Logger, createLoggerConfig } from './utils/logger'; + +// Initilise logger first, before any other imports that might use it +const loggerConfig = createLoggerConfig(); +const logger = Logger.getInstance(loggerConfig); + import ResourceRouter from './routes/resource'; import MatchRouter from './routes/match'; -import { config } from './config'; import { dbHeader } from './middleware/db-locator'; -import { requestLogger } from './middleware/logging'; +import { requestLogger } from './middleware/request-logger'; import { errorHandler } from './middleware/error-handler'; -import Embedding from './tasks/embedding'; import { ensureAllOllamaModelsAreAvailable } from './utils/ollama'; -import { splitSemantically } from './tasks/semantic-split'; -import { createWorker } from './utils/worker'; import { ensureAllHuggingfaceModelsAreAvailable } from './utils/huggingface'; -import { EmbeddingTask } from './utils/types'; import { CompetenceMatcherError } from './utils/errors'; -import { logError } from './middleware/logging'; import workerManager from './worker/worker-manager'; -const { port: PORT, verbose } = config; +const { port: PORT } = config; export const PATHS = { resource: '/resource-competence-list', @@ -34,35 +35,23 @@ async function main() { const app = express(); try { - if (verbose) { - console.log('[Server] Initialising competence matcher service...'); - } + logger.info('server', 'Initialising competence matcher service...'); // Ensure all required models are available // Hugging Face models - if (verbose) { - console.log('[Server] Checking HuggingFace models availability...'); - } + logger.info('server', 'Checking HuggingFace models availability...'); await ensureAllHuggingfaceModelsAreAvailable(); // Ollama models - if (verbose) { - console.log('[Server] Checking Ollama models availability...'); - } + logger.info('server', 'Checking Ollama models availability...'); await ensureAllOllamaModelsAreAvailable(); - if (verbose) { - console.log('[Server] All required models are available'); - } + logger.info('server', 'All required models are available'); // Wait for worker pools to be ready - if (verbose) { - console.log('[Server] Waiting for worker pools to be ready...'); - } + logger.info('server', 'Waiting for worker pools to be ready...'); await workerManager.ready(); - if (verbose) { - console.log('[Server] All worker pools are ready'); - } + logger.info('server', 'All worker pools are ready'); } catch (error) { const initError = new CompetenceMatcherError( `Failed to initialise service: ${error instanceof Error ? error.message : String(error)}`, @@ -78,8 +67,7 @@ async function main() { }, ); - logError(initError, 'server_startup_failure'); - console.error('[Server] Failed to start due to initialisation error'); + logger.error('server', 'Failed to start due to initialisation error', initError); process.exit(1); } @@ -105,9 +93,7 @@ async function main() { app.use(errorHandler); app.listen(PORT, () => { - if (verbose) { - console.log(`[Server] Matching-Server is running on http://localhost:${PORT}`); - } + logger.info('server', `Matching-Server is running on http://localhost:${PORT}`); }); } @@ -120,7 +106,7 @@ main().catch((error) => { { originalError: error instanceof Error ? error.message : String(error) }, ); - logError(startupError, 'server_startup_failure'); - console.error('[Server] Server shutdown due to startup error:', error); + logger.error('server', 'Server startup failed', startupError); + logger.error('system', '[Server] Server shutdown due to startup error:', error); process.exit(1); }); diff --git a/src/competence-matcher/src/tasks/embedding.ts b/src/competence-matcher/src/tasks/embedding.ts index 0f320214c..39356cfac 100644 --- a/src/competence-matcher/src/tasks/embedding.ts +++ b/src/competence-matcher/src/tasks/embedding.ts @@ -6,6 +6,11 @@ import { import { config } from '../config'; import { TransformerPipeline } from '../utils/model'; import { TransformerPipelineOptions } from '../utils/types'; +import { getLogger } from '../utils/logger'; + +function getLoggerInstance() { + return getLogger(); +} export default class Embedding extends TransformerPipeline { protected static override getPipelineOptions(): TransformerPipelineOptions { @@ -14,7 +19,7 @@ export default class Embedding extends TransformerPipeline { - // console.log(`Embedding progress: ${progress}`); + // logger.debug("system", `Embedding progress: ${progress}`); // }, }, }; diff --git a/src/competence-matcher/src/tasks/reason.ts b/src/competence-matcher/src/tasks/reason.ts index ff1cba017..45e5f3cea 100644 --- a/src/competence-matcher/src/tasks/reason.ts +++ b/src/competence-matcher/src/tasks/reason.ts @@ -4,20 +4,25 @@ import { MATCH_REASON as intructPrompt } from '../utils/prompts'; import type { Message } from 'ollama'; import { Match } from '../utils/types'; import { ReasoningError, OllamaConnectionError } from '../utils/errors'; -import { logError } from '../middleware/logging'; +import { getLogger } from '../utils/logger'; -const { reasonModel, verbose } = config; +const { reasonModel } = config; + +function getLoggerInstance() { + return getLogger(); +} export async function addReason(matches: T[], targetText: string): Promise { if (matches.length === 0) { return matches; // No matches to reason about } - if (verbose) { - console.log( - `[Reasoning] Adding reasons for ${matches.length} matches using model: ${reasonModel}`, - ); - } + const logger = getLoggerInstance(); + + logger.debug('model', `Adding reasoning to ${matches.length} matches`, { + targetTextLength: targetText.length, + reasonModel, + }); const reasonMatches: T[] = await Promise.all( matches.map(async (match, index) => { @@ -38,9 +43,14 @@ export async function addReason(matches: T[], targetText: strin // Extract the reason from the response const reason = response.message.content.trim(); - if (verbose) { - console.log(`[Reasoning] Generated reason for match ${index + 1}/${matches.length}`); - } + getLoggerInstance().debug( + 'model', + `Generated reasoning for match ${index + 1}/${matches.length}`, + { + matchText: match.text.substring(0, 50) + (match.text.length > 50 ? '...' : ''), + reasonLength: reason.length, + }, + ); return { ...match, @@ -52,14 +62,19 @@ export async function addReason(matches: T[], targetText: strin error instanceof Error ? error : new Error(String(error)), ); - logError(reasoningError, 'reasoning_single_match_failure', undefined, { - matchIndex: index, - totalMatches: matches.length, - targetTextLength: targetText.length, - matchText: match.text.substring(0, 100) + (match.text.length > 100 ? '...' : ''), - similarity: match.distance, - reasonModel, - }); + getLoggerInstance().error( + 'model', + 'Failed to generate reasoning for match', + reasoningError, + { + matchIndex: index, + totalMatches: matches.length, + targetTextLength: targetText.length, + matchText: match.text.substring(0, 100) + (match.text.length > 100 ? '...' : ''), + similarity: match.distance, + reasonModel, + }, + ); // If there's an error, just keep the original match without a reason return match; @@ -67,14 +82,19 @@ export async function addReason(matches: T[], targetText: strin }), ); - if (verbose) { - const successfulReasons = reasonMatches.filter( - (match) => 'reason' in match && match.reason, - ).length; - console.log( - `[Reasoning] Completed: ${successfulReasons}/${matches.length} matches received reasons`, - ); - } + const successfulReasons = reasonMatches.filter( + (match) => 'reason' in match && match.reason, + ).length; + + getLoggerInstance().debug( + 'model', + `Reasoning completed: ${successfulReasons}/${matches.length} matches received reasons`, + { + successfulReasons, + totalMatches: matches.length, + reasonModel, + }, + ); return reasonMatches; } diff --git a/src/competence-matcher/src/tasks/semantic-split.ts b/src/competence-matcher/src/tasks/semantic-split.ts index e05677a26..a8e13626e 100644 --- a/src/competence-matcher/src/tasks/semantic-split.ts +++ b/src/competence-matcher/src/tasks/semantic-split.ts @@ -4,23 +4,28 @@ import { SEMANTIC_SPLITTER as intructPrompt } from '../utils/prompts'; import type { Message } from 'ollama'; import { EmbeddingTask } from '../utils/types'; import { SemanticSplittingError, OllamaConnectionError } from '../utils/errors'; -import { logError } from '../middleware/logging'; +import { getLogger } from '../utils/logger'; const { splittingModel, splittingSymbol, ollamaBatchSize, splittingLength: MIN_TEXT_LENGTH, - verbose, } = config; +function getLoggerInstance() { + return getLogger(); +} + export async function splitSemantically(tasks: EmbeddingTask[]): Promise { + const logger = getLoggerInstance(); const splittedTasks: EmbeddingTask[] = []; const toSplit: { task: EmbeddingTask; messages: Message[] }[] = []; - if (verbose) { - console.log(`[Semantic Split] Processing ${tasks.length} tasks`); - } + logger.debug('system', `Processing ${tasks.length} tasks for semantic splitting`, { + taskCount: tasks.length, + minTextLength: MIN_TEXT_LENGTH, + }); for (const task of tasks) { const messages: Message[] = [ @@ -50,19 +55,16 @@ export async function splitSemantically(tasks: EmbeddingTask[]): Promise { try { @@ -77,18 +79,16 @@ export async function splitSemantically(tasks: EmbeddingTask[]): Promise part !== ''); if (parts.length === 0) { - if (verbose) { - console.warn( - `[Semantic Split] No valid parts found for task ${task.listId}/${task.resourceId}/${task.competenceId}, using original text`, - ); - } + logger.warn( + 'system', + `[Semantic Split] No valid parts found for task ${task.listId}/${task.resourceId}/${task.competenceId}, using original text`, + ); splittedTasks.push({ ...task, text: task.text }); } else { - if (verbose) { - console.log( - `[Semantic Split] Split task ${task.listId}/${task.resourceId}/${task.competenceId} into ${parts.length} parts`, - ); - } + logger.debug( + 'system', + `[Semantic Split] Split task ${task.listId}/${task.resourceId}/${task.competenceId} into ${parts.length} parts`, + ); for (const part of parts) { splittedTasks.push({ ...task, text: part }); } @@ -99,7 +99,7 @@ export async function splitSemantically(tasks: EmbeddingTask[]): Promise { protected static override getPipelineOptions(): TransformerPipelineOptions { @@ -14,7 +19,7 @@ export default class ZeroShot extends TransformerPipeline { - // console.log(`Embedding progress: ${progress}`); + // logger.debug("system", `Embedding progress: ${progress}`); // }, model_file_name: 'model.onnx', use_external_data_format: true, diff --git a/src/competence-matcher/src/utils/huggingface.ts b/src/competence-matcher/src/utils/huggingface.ts index 401597745..0f76908dd 100644 --- a/src/competence-matcher/src/utils/huggingface.ts +++ b/src/competence-matcher/src/utils/huggingface.ts @@ -1,29 +1,21 @@ import Embedding from '../tasks/embedding'; import ZeroShotSemanticOpposites from '../tasks/semantic-zeroshot'; import { HuggingFaceModelError } from './errors'; -import { config } from '../config'; +import { getLogger } from './logger'; -const { verbose } = config; +const logger = getLogger(); export async function ensureAllHuggingfaceModelsAreAvailable() { - if (verbose) { - console.log('[HuggingFace] Checking availability of required models...'); - } + logger.debug('model', 'Checking availability of required models...'); try { - if (verbose) { - console.log('[HuggingFace] Initialising embedding model...'); - } + logger.debug('model', 'Initialising embedding model...'); await Embedding.getInstance(); - if (verbose) { - console.log('[HuggingFace] Initialising zero-shot semantic opposites model...'); - } + logger.debug('model', 'Initialising zero-shot semantic opposites model...'); await ZeroShotSemanticOpposites.getInstance(); - if (verbose) { - console.log('[HuggingFace] All models initialised successfully'); - } + logger.modelInfo('All HuggingFace models initialised successfully'); } catch (error) { throw new HuggingFaceModelError( 'unknown', // We don't know which specific model failed - will maybe add later @@ -31,7 +23,5 @@ export async function ensureAllHuggingfaceModelsAreAvailable() { error instanceof Error ? error : new Error(String(error)), ); } - if (verbose) { - console.log('[HuggingFace] All required HuggingFace-Models are available'); - } + logger.modelInfo('All required HuggingFace models are available'); } diff --git a/src/competence-matcher/src/utils/logger.ts b/src/competence-matcher/src/utils/logger.ts new file mode 100644 index 000000000..24d90e732 --- /dev/null +++ b/src/competence-matcher/src/utils/logger.ts @@ -0,0 +1,316 @@ +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { randomUUID } from 'node:crypto'; +import { config } from '../config'; +import { LogLevel, LogType, LogEntry, LoggerConfig } from './types'; + +// Re-export types for convenience +export { LogLevel, LogType, LogEntry, LoggerConfig } from './types'; + +export function createLoggerConfig(): LoggerConfig { + // Parse enabled log types from config + const enabledTypes = config.logTypes + .split(',') + .map((type) => type.trim()) + .filter((type) => + ['server', 'request', 'worker', 'database', 'model', 'system'].includes(type), + ) as LogType[]; + + // Convert string log level to enum + const logLevelMap: { [key: string]: LogLevel } = { + DEBUG: LogLevel.DEBUG, + INFO: LogLevel.INFO, + WARN: LogLevel.WARN, + ERROR: LogLevel.ERROR, + }; + + const mappedLevel = logLevelMap[config.logLevel.toUpperCase()]; + + return { + level: mappedLevel !== undefined ? mappedLevel : LogLevel.INFO, + enabledTypes, + enableConsole: config.logToConsole, + enableFile: config.logToFile, + logDir: config.logPath, + colorize: true, + }; +} + +// ANSI color codes for console output +const colors = { + DEBUG: '\x1b[36m', // Cyan + INFO: '\x1b[32m', // Green + WARN: '\x1b[33m', // Yellow + ERROR: '\x1b[31m', // Red + reset: '\x1b[0m', // Reset + dim: '\x1b[2m', // Dim + bold: '\x1b[1m', // Bold + gray: '\x1b[90m', // Gray + red: '\x1b[31m', // Red +}; + +// Type colors for better visual distinction +const typeColors = { + server: '\x1b[35m', // Magenta + request: '\x1b[34m', // Blue + worker: '\x1b[36m', // Cyan + database: '\x1b[33m', // Yellow + model: '\x1b[32m', // Green + system: '\x1b[37m', // White +}; + +export class Logger { + private static instance: Logger; + private config: LoggerConfig; + private logStreams: Map = new Map(); + + private constructor(config: LoggerConfig) { + this.config = config; + this.initialiseLogStreams(); + } + + public static getInstance(config?: LoggerConfig): Logger { + if (!Logger.instance) { + if (!config) { + throw new Error('Logger must be initialised with config on first call'); + } + Logger.instance = new Logger(config); + } + return Logger.instance; + } + + private initialiseLogStreams(): void { + if (!this.config.enableFile) return; + + // Ensure log directory exists + if (!fs.existsSync(this.config.logDir)) { + fs.mkdirSync(this.config.logDir, { recursive: true }); + } + + const today = new Date().toISOString().split('T')[0]; + + // Main log file (all logs) + const mainLogPath = path.join(this.config.logDir, `competence-matcher-${today}.json`); + this.logStreams.set('main', fs.createWriteStream(mainLogPath, { flags: 'a' })); + + // Error-only log file + const errorLogPath = path.join(this.config.logDir, `errors-${today}.log`); + this.logStreams.set('error', fs.createWriteStream(errorLogPath, { flags: 'a' })); + + // Request-only log file + const requestLogPath = path.join(this.config.logDir, `requests-${today}.log`); + this.logStreams.set('request', fs.createWriteStream(requestLogPath, { flags: 'a' })); + } + + private shouldLog(level: LogLevel, type: LogType): boolean { + return level >= this.config.level && this.config.enabledTypes.includes(type); + } + + private formatTimestamp(): string { + return new Date().toISOString().replace('T', ' ').replace('Z', ''); + } + + private formatConsoleMessage(entry: LogEntry): string { + if (!this.config.colorize) { + return this.formatPlainMessage(entry); + } + + const levelColor = colors[LogLevel[entry.level] as keyof typeof colors]; + const typeColor = typeColors[entry.type]; + const timestamp = `${colors.gray}[${entry.timestamp}]${colors.reset}`; + const level = `${levelColor}${entry.levelName.padEnd(5)}${colors.reset}`; + const type = `${typeColor}[${entry.type}${entry.requestId ? `:${String(entry.requestId).slice(0, 8)}` : ''}]${colors.reset}`; + const message = entry.message; + + let output = `${timestamp} ${level} ${type} ${message}`; + + // Add data if present + if (entry.data) { + const dataStr = + typeof entry.data === 'object' ? JSON.stringify(entry.data, null, 2) : String(entry.data); + output += `\n${colors.dim}${dataStr}${colors.reset}`; + } + + // Add error details if present + if (entry.error) { + output += `\n${colors.red}Error: ${entry.error.message}${colors.reset}`; + if (entry.error.stack) { + output += `\n${colors.dim}${entry.error.stack}${colors.reset}`; + } + } + + return output; + } + + private formatPlainMessage(entry: LogEntry): string { + const timestamp = `[${entry.timestamp}]`; + const level = entry.levelName.padEnd(5); + const type = `[${entry.type}${entry.requestId ? `:${String(entry.requestId).slice(0, 8)}` : ''}]`; + + let output = `${timestamp} ${level} ${type} ${entry.message}`; + + if (entry.data) { + const dataStr = + typeof entry.data === 'object' ? JSON.stringify(entry.data) : String(entry.data); + output += ` | ${dataStr}`; + } + + if (entry.error) { + output += ` | Error: ${entry.error.message}`; + } + + return output; + } + + private writeToFile(entry: LogEntry): void { + if (!this.config.enableFile) return; + + // Write to main log file (structured JSON) + const mainStream = this.logStreams.get('main'); + if (mainStream) { + mainStream.write(JSON.stringify(entry) + '\n'); + } + + // Write to error log file (human readable) + if (entry.level === LogLevel.ERROR) { + const errorStream = this.logStreams.get('error'); + if (errorStream) { + errorStream.write(this.formatPlainMessage(entry) + '\n'); + } + } + + // Write to request log file + if (entry.type === 'request') { + const requestStream = this.logStreams.get('request'); + if (requestStream) { + requestStream.write(this.formatPlainMessage(entry) + '\n'); + } + } + } + + private log( + level: LogLevel, + type: LogType, + message: string, + data?: any, + error?: Error, + requestId?: string, + ): void { + if (!this.shouldLog(level, type)) return; + + const entry: LogEntry = { + timestamp: this.formatTimestamp(), + level, + levelName: LogLevel[level], + type, + message, + requestId, + data, + error: error + ? { + message: error.message, + stack: error.stack, + name: error.name, + } + : undefined, + }; + + // Console output + if (this.config.enableConsole) { + console.log(this.formatConsoleMessage(entry)); + } + + // File output + this.writeToFile(entry); + } + + // Public logging methods + public debug(type: LogType, message: string, data?: any, requestId?: string): void { + this.log(LogLevel.DEBUG, type, message, data, undefined, requestId); + } + + public info(type: LogType, message: string, data?: any, requestId?: string): void { + this.log(LogLevel.INFO, type, message, data, undefined, requestId); + } + + public warn(type: LogType, message: string, data?: any, requestId?: string): void { + this.log(LogLevel.WARN, type, message, data, undefined, requestId); + } + + public error( + type: LogType, + message: string, + error?: Error, + data?: any, + requestId?: string, + ): void { + this.log(LogLevel.ERROR, type, message, data, error, requestId); + } + + // Specialised logging methods for common patterns + public request( + method: string, + path: string, + statusCode: number, + responseTime: number, + requestId: string, + additionalData?: any, + ): void { + const message = `${method} ${path} (${statusCode}) ${responseTime}ms`; + this.log(LogLevel.INFO, 'request', message, additionalData, undefined, requestId); + } + + public worker(message: string, data?: any, requestId?: string): void { + this.log(LogLevel.DEBUG, 'worker', message, data, undefined, requestId); + } + + public workerInfo(message: string, data?: any, requestId?: string): void { + this.log(LogLevel.INFO, 'worker', message, data, undefined, requestId); + } + + public workerError(message: string, error?: Error, data?: any, requestId?: string): void { + this.log(LogLevel.ERROR, 'worker', message, data, error, requestId); + } + + public database(message: string, data?: any, requestId?: string): void { + this.log(LogLevel.DEBUG, 'database', message, data, undefined, requestId); + } + + public databaseError(message: string, error?: Error, data?: any, requestId?: string): void { + this.log(LogLevel.ERROR, 'database', message, data, error, requestId); + } + + public model(message: string, data?: any, requestId?: string): void { + this.log(LogLevel.DEBUG, 'model', message, data, undefined, requestId); + } + + public modelInfo(message: string, data?: any, requestId?: string): void { + this.log(LogLevel.INFO, 'model', message, data, undefined, requestId); + } + + // Utility methods + public generateRequestId(): string { + return `req_${Date.now()}_${randomUUID().slice(0, 8)}`; + } + + public close(): void { + for (const stream of this.logStreams.values()) { + stream.end(); + } + this.logStreams.clear(); + } + + // Update configuration + public updateConfig(newConfig: Partial): void { + this.config = { ...this.config, ...newConfig }; + + // Reinitialise streams if file logging config changed + if (newConfig.enableFile !== undefined || newConfig.logDir) { + this.close(); + this.initialiseLogStreams(); + } + } +} + +// Export singleton instance getter +export const getLogger = () => Logger.getInstance(); diff --git a/src/competence-matcher/src/utils/model.ts b/src/competence-matcher/src/utils/model.ts index 31635ea16..d7979a24f 100644 --- a/src/competence-matcher/src/utils/model.ts +++ b/src/competence-matcher/src/utils/model.ts @@ -53,9 +53,9 @@ export abstract class TransformerPipeline { // mark it as loaded and log on first load if (!this.loaded && isMainThread) { - if (config.verbose) { - console.log(`[Model-Pipeline] ${model} (${task}) is ready`); - } + const { getLogger } = require('./logger'); + const logger = getLogger(); + logger.modelInfo(`Model pipeline ${model} (${task}) is ready`); this.loaded = true; } } diff --git a/src/competence-matcher/src/utils/ollama.ts b/src/competence-matcher/src/utils/ollama.ts index 089bc3f08..712209f6f 100644 --- a/src/competence-matcher/src/utils/ollama.ts +++ b/src/competence-matcher/src/utils/ollama.ts @@ -1,8 +1,18 @@ import { Ollama } from 'ollama'; import { config } from '../config'; import { OllamaConnectionError } from './errors'; +import { getLogger } from './logger'; -const { ollamaPath, splittingModel, reasonModel, ollamaBearerToken, verbose } = config; +const { ollamaPath, splittingModel, reasonModel, ollamaBearerToken } = config; + +// Lazy logger initialization to avoid module loading order issues +let logger: ReturnType | null = null; +const getLoggerInstance = () => { + if (!logger) { + logger = getLogger(); + } + return logger; +}; export const ollama = new Ollama({ host: ollamaPath, @@ -21,18 +31,17 @@ export const ollama = new Ollama({ export async function ensureAllOllamaModelsAreAvailable() { const models = [splittingModel, reasonModel]; - if (verbose) { - console.log(`[Ollama] Checking availability of models: ${models.join(', ')}`); - } + getLoggerInstance().debug( + 'model', + `Checking availability of Ollama models: ${models.join(', ')}`, + ); let availableModels: string[]; try { const modelList = await ollama.list(); availableModels = modelList.models.map((model) => model.model); - if (verbose) { - console.log(`[Ollama] Available models: ${availableModels.join(', ')}`); - } + getLoggerInstance().debug('model', `Available Ollama models: ${availableModels.join(', ')}`); } catch (error) { throw new OllamaConnectionError( ollamaPath, @@ -43,9 +52,7 @@ export async function ensureAllOllamaModelsAreAvailable() { for (const model of models) { if (!availableModels.includes(model)) { - if (verbose) { - console.log(`[Ollama] Model '${model}' not found, attempting to pull...`); - } + getLoggerInstance().info('model', `Ollama model '${model}' not found, attempting to pull...`); try { const modelpull = await ollama.pull({ @@ -63,9 +70,7 @@ export async function ensureAllOllamaModelsAreAvailable() { ); } - if (verbose) { - console.log(`[Ollama] Successfully pulled model '${model}'`); - } + getLoggerInstance().info('model', `Successfully pulled Ollama model '${model}'`); } catch (error) { // If the pull takes too long and the ollama is behind a proxy, it can timeout (504 as response code) // In this case, we just recheck the model availability @@ -80,13 +85,12 @@ export async function ensureAllOllamaModelsAreAvailable() { ); } } else { - if (verbose) { - console.log(`[Ollama] Model '${model}' is available (already downloaded)`); - } + getLoggerInstance().debug( + 'model', + `Ollama model '${model}' is available (already downloaded)`, + ); } } - if (verbose) { - console.log('[Ollama] All required Ollama-Models models are available'); - } + getLoggerInstance().modelInfo('All required Ollama models are available'); } diff --git a/src/competence-matcher/src/utils/types.ts b/src/competence-matcher/src/utils/types.ts index 98bd306b3..2a708da93 100644 --- a/src/competence-matcher/src/utils/types.ts +++ b/src/competence-matcher/src/utils/types.ts @@ -1,5 +1,47 @@ import { PretrainedModelOptions } from '@huggingface/transformers'; +// ===== LOGGING TYPES ===== + +// Logger levels in order of severity +export enum LogLevel { + DEBUG = 0, + INFO = 1, + WARN = 2, + ERROR = 3, +} + +// Logger types for different components +export type LogType = 'server' | 'request' | 'worker' | 'database' | 'model' | 'system'; + +// Log entry structure +export interface LogEntry { + timestamp: string; + level: LogLevel; + levelName: string; + type: LogType; + message: string; + context?: string; + requestId?: string; + data?: any; + error?: { + message: string; + stack?: string; + name?: string; + }; +} + +// Logger configuration +export interface LoggerConfig { + level: LogLevel; + enabledTypes: LogType[]; + enableConsole: boolean; + enableFile: boolean; + logDir: string; + colorize: boolean; +} + +// ===== COMPETENCE MATCHING TYPES ===== + export type Competence = { listId: string; // UUIDString resourceId: string; // UUIDString @@ -144,22 +186,3 @@ export interface TransformerPipelineOptions { model: string; options?: PretrainedModelOptions; } - -export interface LogEntry { - timestamp: string; - requestId: string; - type: 'request' | 'response' | 'error'; - method?: string; - path?: string; - query?: object; - body?: any; - headers?: object; - params?: object; - ip?: string; - realIp?: string | string[]; - statusCode?: number; - responseTime?: number; - error?: string; - errorStack?: string; - context?: string; -} diff --git a/src/competence-matcher/src/utils/worker.ts b/src/competence-matcher/src/utils/worker.ts index dc65c7bbf..1d894283b 100644 --- a/src/competence-matcher/src/utils/worker.ts +++ b/src/competence-matcher/src/utils/worker.ts @@ -4,9 +4,27 @@ import { Worker, parentPort } from 'worker_threads'; import VectorDataBase from '../db/db'; import { getDB } from './db'; import { config } from '../config'; +import { getLogger, createLoggerConfig } from './logger'; const { maxJobTime } = config; +// Initialise logger for worker if not already done +let logger: ReturnType | null = null; + +function ensureLogger() { + if (!logger) { + try { + logger = getLogger(); + } catch (error) { + // Logger not initialised yet, initialise it + const { getLogger: initLogger } = require('./logger'); + initLogger(createLoggerConfig()); + logger = getLogger(); + } + } + return logger; +} + export function createWorker(filename: string): Worker { const tsPath = path.resolve(__dirname, `../worker/${filename}.ts`); const jsPath = path.resolve(__dirname, `../worker/${filename}.js`); @@ -77,3 +95,172 @@ export async function withJobUpdates( export function log(...args: any[]) { parentPort?.postMessage({ type: 'log', message: args.map(String).join(' ') }); } + +/** + * Context manager for propagating request IDs through worker threads + */ +export class WorkerContext { + private static contexts: Map = new Map(); // jobId -> requestId + + /** + * Set context for a job + */ + static setContext(jobId: string, requestId: string): void { + this.contexts.set(jobId, requestId); + ensureLogger().debug('worker', `Context set for job ${jobId}`, { requestId }); + } + + /** + * Get context for a job + */ + static getContext(jobId: string): string | undefined { + return this.contexts.get(jobId); + } + + /** + * Remove context for a completed job + */ + static clearContext(jobId: string): void { + const removed = this.contexts.delete(jobId); + if (removed) { + ensureLogger().debug('worker', `Context cleared for job ${jobId}`); + } + } + + /** + * Log worker activity with proper context + */ + static logWorker( + level: 'debug' | 'info' | 'warn' | 'error', + jobId: string, + message: string, + data?: any, + error?: Error, + ): void { + const requestId = this.getContext(jobId); + + switch (level) { + case 'debug': + ensureLogger().worker(message, { jobId, ...data }, requestId); + break; + case 'info': + ensureLogger().workerInfo(message, { jobId, ...data }, requestId); + break; + case 'warn': + ensureLogger().warn('worker', message, { jobId, ...data }, requestId); + break; + case 'error': + ensureLogger().workerError(message, error, { jobId, ...data }, requestId); + break; + } + } + + /** + * Enhanced worker creation with context support + */ + static createWorkerWithContext( + filename: string, + jobId: string, + requestId: string, + workerData?: any, + ): Worker { + this.setContext(jobId, requestId); + + const worker = createWorker(filename); + + // Log worker lifecycle events + worker.on('online', () => { + this.logWorker('debug', jobId, `Worker ${worker.threadId} started`); + }); + + worker.on('exit', (code) => { + this.logWorker('debug', jobId, `Worker ${worker.threadId} exited with code ${code}`); + this.clearContext(jobId); + }); + + worker.on('error', (error) => { + this.logWorker('error', jobId, `Worker ${worker.threadId} error`, undefined, error); + this.clearContext(jobId); + }); + + return worker; + } +} + +/** + * Enhanced worker message handling with logging + */ +export function handleWorkerMessage( + worker: Worker, + jobId: string, + onMessage: (message: any) => void, + onError?: (error: Error) => void, +): void { + worker.on('message', (message) => { + WorkerContext.logWorker('debug', jobId, 'Received message from worker', { + threadId: worker.threadId, + messageType: message.type || 'unknown', + }); + onMessage(message); + }); + + worker.on('error', (error) => { + WorkerContext.logWorker( + 'error', + jobId, + 'Worker error occurred', + { + threadId: worker.threadId, + }, + error, + ); + if (onError) { + onError(error); + } + }); +} + +/** + * Utility to log from within worker threads + * This should be used in worker files to maintain context + */ +export function workerLogger( + type: string, + level: 'debug' | 'info' | 'warn' | 'error', + message: string, + data?: any, + error?: Error, +): void { + // In worker threads, we'll post messages back to main thread for logging + // This ensures all logs go through the central logger + const logData = { + type: 'log', + level, + logType: type, // Add type field for consistency with main logger + message, + data, + error: error + ? { + message: error.message, + stack: error.stack, + name: error.name, + } + : undefined, + timestamp: new Date().toISOString(), + threadId: process.env.WORKER_THREAD_ID || 'unknown', + }; + + // If we're in a worker thread, post message to parent + if (typeof process !== 'undefined' && process.send) { + process.send(logData); + } + // For Node.js worker threads, we use parentPort + try { + const { parentPort } = require('worker_threads'); + if (parentPort) { + parentPort.postMessage(logData); + } + } catch { + // Fallback - not in worker thread context + } +} diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index aaf8d8dfc..034a4d299 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -1,11 +1,17 @@ -import { parentPort, threadId } from 'worker_threads'; +import { parentPort, threadId, workerData } from 'worker_threads'; import Embedding from '../tasks/embedding'; import { splitSemantically } from '../tasks/semantic-split'; import { withJobUpdates } from '../utils/worker'; -import { config } from '../config'; import { EmbeddingJob } from '../utils/types'; +import { workerLogger } from '../utils/worker'; +import { getLogger, createLoggerConfig, Logger } from '../utils/logger'; -const { verbose } = config; +// Initialise logger for this worker thread +try { + Logger.getInstance(createLoggerConfig()); +} catch (error) { + // Logger already initialised +} /** * New embedder worker that stays alive and processes jobs sequentially @@ -18,9 +24,11 @@ if (!parentPort) { parentPort.on('message', async (message: any) => { // Handle health checks with highest priority if (message?.type === 'health_check') { - if (verbose) { - console.log(`[Embedder Worker] Thread ${threadId} received health check ${message.checkId}`); - } + workerLogger('system', 'debug', `Health check received: ${message.checkId}`, { + threadId, + checkId: message.checkId, + }); + parentPort!.postMessage({ type: 'health_check_response', checkId: message.checkId, @@ -28,9 +36,10 @@ parentPort.on('message', async (message: any) => { workerType: 'embedder', threadId: threadId, }); - if (verbose) { - console.log(`[Embedder Worker] Thread ${threadId} sent health check response`); - } + + workerLogger('system', 'debug', `Health check response sent: ${message.checkId}`, { + threadId, + }); return; } @@ -40,11 +49,10 @@ parentPort.on('message', async (message: any) => { // Set global job context for logging (global as any).CURRENT_JOB = job.jobId; - if (verbose) { - console.log( - `[Embedder Worker] Received and starting job ${job.jobId} with ${job.tasks.length} tasks`, - ); - } + workerLogger(job.jobId, 'info', `Starting embedding job with ${job.tasks.length} tasks`, { + threadId, + taskCount: job.tasks.length, + }); try { await withJobUpdates(job, async (db, { tasks, jobId }) => { @@ -60,9 +68,11 @@ parentPort.on('message', async (message: any) => { // Generate embedding for the text const [vector] = await Embedding.embed(text); - if (verbose) { - console.log(`[Embedder Worker] Generated embedding for ${type} text (job ${jobId})`); - } + workerLogger(jobId, 'debug', `Generated embedding for ${type} text`, { + threadId, + competenceId, + textLength: text.length, + }); // Store embedding in database db.upsertEmbedding({ @@ -75,6 +85,18 @@ parentPort.on('message', async (message: any) => { }); } catch (error) { // Log the error but continue with other tasks + workerLogger( + jobId, + 'error', + `Failed to process embedding task`, + { + threadId, + competenceId, + type, + }, + error instanceof Error ? error : new Error(String(error)), + ); + parentPort!.postMessage({ type: 'error', jobId, @@ -90,11 +112,22 @@ parentPort.on('message', async (message: any) => { jobId: job.jobId, }); - if (verbose) { - console.log(`[Embedder Worker] Completed job ${job.jobId}`); - } + workerLogger(job.jobId, 'info', `Embedding job completed`, { + threadId, + taskCount: job.tasks.length, + }); } catch (error) { // Handle job-level errors + workerLogger( + job.jobId, + 'error', + `Embedding job failed`, + { + threadId, + }, + error instanceof Error ? error : new Error(String(error)), + ); + parentPort!.postMessage({ type: 'error', jobId: job.jobId, @@ -109,6 +142,6 @@ parentPort.on('message', async (message: any) => { } }); -if (verbose) { - console.log(`[Embedder Worker] Worker thread ${threadId} ready to process embedding jobs`); -} +workerLogger('system', 'info', `Embedder worker thread ready`, { + threadId, +}); diff --git a/src/competence-matcher/src/worker/matcher-new.ts b/src/competence-matcher/src/worker/matcher-new.ts deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 78a487999..c3f40c005 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -4,9 +4,16 @@ import { withJobUpdates } from '../utils/worker'; import { addReason } from '../tasks/reason'; import { Match, MatchingJob } from '../utils/types'; import ZeroShot from '../tasks/semantic-zeroshot'; -import { config } from '../config'; +import { Logger, createLoggerConfig } from '../utils/logger'; -const { verbose } = config; +// Initialise logger for this worker thread +try { + Logger.getInstance(createLoggerConfig()); +} catch (error) { + // Logger already initialised +} + +// Note: Verbose logging has been replaced with the new logger system /** * New matcher worker that stays alive and processes jobs sequentially @@ -19,9 +26,6 @@ if (!parentPort) { parentPort.on('message', async (message: any) => { // Handle health checks with highest priority if (message?.type === 'health_check') { - if (verbose) { - console.log(`[Matcher Worker] Thread ${threadId} received health check ${message.checkId}`); - } parentPort!.postMessage({ type: 'health_check_response', checkId: message.checkId, @@ -29,9 +33,7 @@ parentPort.on('message', async (message: any) => { workerType: 'matcher', threadId: threadId, }); - if (verbose) { - console.log(`[Matcher Worker] Thread ${threadId} sent health check response`); - } + return; } @@ -41,12 +43,6 @@ parentPort.on('message', async (message: any) => { // Set global job context for logging (global as any).CURRENT_JOB = job.jobId; - if (verbose) { - console.log( - `[Matcher Worker] Received and starting job ${job.jobId} with ${job.tasks.length} tasks`, - ); - } - try { // Store match results for reasoning workaround const matchResults: { [description: string]: any[] } = {}; @@ -65,9 +61,6 @@ parentPort.on('message', async (message: any) => { const { taskId, name, description, executionInstructions, requiredCompetencies } = task; if (!description) { - if (verbose) { - console.log(`[Matcher Worker] Skipping task ${taskId} - no description provided`); - } continue; // Skip tasks without description } @@ -77,12 +70,6 @@ parentPort.on('message', async (message: any) => { // and passing the embedding directly, but for now we keep the same approach const [vector] = await Embedding.embed(description); - if (verbose) { - console.log( - `[Matcher Worker] Generated task embedding for job ${jobId}, task ${taskId}`, - ); - } - // Search for matches in the competence database let matches: Match[] = db.searchEmbedding(vector, { filter: { @@ -91,12 +78,6 @@ parentPort.on('message', async (message: any) => { }, }); - if (verbose) { - console.log( - `[Matcher Worker] Found ${matches.length} initial matches for task ${taskId}`, - ); - } - // TODO: Re-enable reasoning once worker stability issues are resolved // Apply reasoning to each match to enhance context // matches = await addReason(description, matches); @@ -184,10 +165,6 @@ parentPort.on('message', async (message: any) => { type: 'job_completed', jobId: job.jobId, }); - - if (verbose) { - console.log(`[Matcher Worker] Completed job ${job.jobId}`); - } } catch (error) { // Handle job-level errors parentPort!.postMessage({ @@ -203,7 +180,3 @@ parentPort.on('message', async (message: any) => { }); } }); - -if (verbose) { - console.log(`[Matcher Worker] Worker thread ${threadId} ready to process matching jobs`); -} diff --git a/src/competence-matcher/src/worker/test.ts b/src/competence-matcher/src/worker/test.ts deleted file mode 100644 index fde1d468a..000000000 --- a/src/competence-matcher/src/worker/test.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { parentPort } from 'worker_threads'; -import { splitSemantically } from '../tasks/semantic-split'; -import { EmbeddingJob } from '../utils/types'; - -parentPort!.once('message', async (job: EmbeddingJob) => { - const { tasks, jobId } = job; - parentPort!.postMessage({ type: 'status', jobId, status: 'running' }); - parentPort!.postMessage(await splitSemantically(tasks)); -}); diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 1ec5b0c31..1bb122e67 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -3,11 +3,18 @@ import { config } from '../config'; import { createWorker } from '../utils/worker'; import { EmbeddingJob, MatchingJob, workerTypes, Match } from '../utils/types'; import { WorkerError, DatabaseError, ReasoningError } from '../utils/errors'; -import { logError } from '../middleware/logging'; +import { getLogger } from '../utils/logger'; import { addReason } from '../tasks/reason'; import { getDB } from '../utils/db'; -const { verbose, embeddingWorkers, matchingWorkers } = config; +const { + embeddingWorkers, + matchingWorkers, + modelLoadingTimeout, + maxWorkerRetries, + workerRetryWindow, +} = config; +const logger = getLogger(); // Job queue interface for task-specific queues interface JobQueueItem { @@ -31,6 +38,11 @@ class WorkerPool { private pendingHealthChecks: Map = new Map(); // Track pending health checks private beingReplaced: Set = new Set(); // Track workers being replaced to prevent double replacement + // Retry tracking + private failureCount: number = 0; // Total failures for this worker type + private lastFailureTime: number = 0; // Timestamp of last failure + private consecutiveFailures: number = 0; // Consecutive failures without recovery + constructor(workerType: workerTypes, poolSize: number) { this.workerType = workerType; this.poolSize = poolSize; @@ -45,9 +57,7 @@ class WorkerPool { this.createAndRegisterWorker(); } - if (verbose) { - console.log(`[WorkerPool] Initialised ${this.poolSize} ${this.workerType} workers`); - } + logger.debug('system', `[WorkerPool] Initialised ${this.poolSize} ${this.workerType} workers`); } /** @@ -56,11 +66,10 @@ class WorkerPool { private createAndRegisterWorker() { // Check if we already have enough workers (including those pending health checks) if (this.workers.length >= this.poolSize) { - if (verbose) { - console.log( - `[WorkerPool] Not creating new ${this.workerType} worker - pool already has ${this.workers.length}/${this.poolSize} workers`, - ); - } + logger.debug( + 'system', + `[WorkerPool] Not creating new ${this.workerType} worker - pool already has ${this.workers.length}/${this.poolSize} workers`, + ); return; } @@ -74,7 +83,7 @@ class WorkerPool { error instanceof Error ? error : new Error(String(error)), ); - logError(workerError, 'worker_pool_creation_failure', undefined, { + logger.workerError('worker_pool_creation_failure', workerError, { workerType: this.workerType, poolSize: this.poolSize, }); @@ -91,9 +100,7 @@ class WorkerPool { // Handle worker lifecycle events worker.once('online', () => { - if (verbose) { - console.log(`[WorkerPool] ${this.workerType} worker ${worker.threadId} is online`); - } + logger.debug('system', `[WorkerPool] ${this.workerType} worker ${worker.threadId} is online`); }); worker.on('error', (err) => { @@ -104,11 +111,26 @@ class WorkerPool { err instanceof Error ? err : new Error(String(err)), ); - logError(workerError, 'worker_pool_runtime_error', undefined, { + // Track failure and determine logging severity + const { shouldLogAsError, retryCount } = this.trackWorkerFailure(); + + const errorData = { workerType: this.workerType, threadId: worker.threadId, jobId, - }); + retryCount, + maxRetries: maxWorkerRetries, + }; + + if (shouldLogAsError) { + logger.workerError('worker_pool_runtime_error', workerError, errorData); + } else { + logger.warn( + 'worker', + `Worker runtime error (attempt ${retryCount}/${maxWorkerRetries}): ${err.message}`, + errorData, + ); + } // Mark worker as available again and try to process next job this.markWorkerAvailable(worker); @@ -119,19 +141,17 @@ class WorkerPool { worker.once('exit', (code) => { const jobId = this.busyWorkers.get(worker) || 'unknown'; - if (verbose) { - console.log( - `[WorkerPool] ${this.workerType} worker ${worker.threadId} exited with code ${code}`, - ); - } + logger.debug( + 'system', + `[WorkerPool] ${this.workerType} worker ${worker.threadId} exited with code ${code}`, + ); // Check if this worker is already being replaced to prevent double replacement if (this.beingReplaced.has(worker)) { - if (verbose) { - console.log( - `[WorkerPool] Worker ${worker.threadId} already being replaced, skipping duplicate replacement`, - ); - } + logger.debug( + 'system', + `[WorkerPool] Worker ${worker.threadId} already being replaced, skipping duplicate replacement`, + ); this.removeWorkerFromPool(worker); return; } @@ -148,11 +168,10 @@ class WorkerPool { public enqueue(job: EmbeddingJob | MatchingJob, options?: JobQueueItem['options']) { this.queue.push({ job, options }); - if (verbose) { - console.log( - `[WorkerPool] Enqueued ${this.workerType} job ${job.jobId} (queue: ${this.queue.length}, available: ${this.availableWorkers.size})`, - ); - } + logger.debug( + 'system', + `[WorkerPool] Enqueued ${this.workerType} job ${job.jobId} (queue: ${this.queue.length}, available: ${this.availableWorkers.size})`, + ); this.processNextJob(); } @@ -179,32 +198,29 @@ class WorkerPool { this.availableWorkers.delete(worker); this.busyWorkers.set(worker, job.jobId); - if (verbose) { - console.log( - `[WorkerPool] Assigning ${this.workerType} job ${job.jobId} to worker ${worker.threadId}`, - ); - } + logger.debug( + 'system', + `[WorkerPool] Assigning ${this.workerType} job ${job.jobId} to worker ${worker.threadId}`, + ); // Set up message handling for this specific job const messageHandler = (message: any) => { try { switch (message.type) { case 'status': - if (verbose) { - console.log( - `[WorkerPool] Worker ${worker.threadId} for job ${message.jobId || job.jobId} status: ${message.status}`, - ); - } + logger.debug( + 'system', + `[WorkerPool] Worker ${worker.threadId} for job ${message.jobId || job.jobId} status: ${message.status}`, + ); break; case 'error': - logError( + logger.workerError( + 'worker_reported_error', new WorkerError( this.workerType, message.jobId || job.jobId, new Error(message.error), ), - 'worker_reported_error', - undefined, { workerType: this.workerType, threadId: worker.threadId, @@ -214,9 +230,30 @@ class WorkerPool { ); break; case 'log': - if (verbose) { - console.log( - `[WorkerPool] Worker ${worker.threadId} for job ${message.jobId || job.jobId} log: ${message.message}`, + // Forward worker logs to main logger + try { + const logType = message.logType || 'worker'; + switch (message.level) { + case 'debug': + logger.debug(logType, message.message, message.data); + break; + case 'info': + logger.info(logType, message.message, message.data); + break; + case 'warn': + logger.warn(logType, message.message, message.data); + break; + case 'error': + const error = message.error ? new Error(message.error.message) : undefined; + if (error && message.error.stack) error.stack = message.error.stack; + logger.error(logType, message.message, error, message.data); + break; + } + } catch (err) { + logger.error( + 'system', + 'Failed to forward worker log', + err instanceof Error ? err : new Error(String(err)), ); } break; @@ -231,10 +268,9 @@ class WorkerPool { if (message.job === 'reason') { // Handle reasoning asynchronously without blocking the worker handleReasoning(job, message).catch((error: any) => { - logError( - error instanceof Error ? error : new Error(String(error)), + logger.workerError( 'reasoning_handler_async_failure', - undefined, + error instanceof Error ? error : new Error(String(error)), { jobId: job.jobId }, ); }); @@ -250,7 +286,7 @@ class WorkerPool { error instanceof Error ? error : new Error(String(error)), ); - logError(messageHandlingError, 'worker_message_handling_error', undefined, { + logger.workerError('worker_message_handling_error', messageHandlingError, { workerType: this.workerType, threadId: worker.threadId, jobId: job.jobId, @@ -272,7 +308,7 @@ class WorkerPool { error instanceof Error ? error : new Error(String(error)), ); - logError(messageError, 'worker_message_send_failure', undefined, { + logger.workerError('worker_message_send_failure', messageError, { workerType: this.workerType, threadId: worker.threadId, jobId: job.jobId, @@ -291,26 +327,40 @@ class WorkerPool { */ private performHealthCheck(worker: Worker) { const healthCheckId = `health_check_${Date.now()}_${worker.threadId}`; - const timeout = 20000; // 20 seconds timeout for model loading + const timeout = modelLoadingTimeout * 1_000; // convert to milliseconds - if (verbose) { - console.log( - `[WorkerPool] Performing health check on ${this.workerType} worker ${worker.threadId}`, - ); - } + logger.debug( + 'system', + `[WorkerPool] Performing health check on ${this.workerType} worker ${worker.threadId}`, + ); // Set up timeout for health check const healthCheckTimeout = setTimeout(() => { - logError( - new WorkerError(this.workerType, healthCheckId, new Error('Health check timeout')), - 'worker_health_check_timeout', - undefined, - { - workerType: this.workerType, - threadId: worker.threadId, - timeout, - }, + // Track failure and determine logging severity + const { shouldLogAsError, retryCount } = this.trackWorkerFailure(); + + const error = new WorkerError( + this.workerType, + healthCheckId, + new Error('Health check timeout'), ); + const errorData = { + workerType: this.workerType, + threadId: worker.threadId, + timeout, + retryCount, + maxRetries: maxWorkerRetries, + }; + + if (shouldLogAsError) { + logger.workerError('worker_health_check_timeout', error, errorData); + } else { + logger.warn( + 'worker', + `Worker health check timeout (attempt ${retryCount}/${maxWorkerRetries})`, + errorData, + ); + } // Clean up pending health check this.pendingHealthChecks.delete(worker); @@ -321,15 +371,16 @@ class WorkerPool { // Terminate unresponsive worker explicitly try { worker.terminate(); - if (verbose) { - console.log( - `[WorkerPool] Terminated unresponsive ${this.workerType} worker ${worker.threadId}`, - ); - } + logger.debug( + 'system', + `[WorkerPool] Terminated unresponsive ${this.workerType} worker ${worker.threadId}`, + ); } catch (error) { - if (verbose) { - console.log(`[WorkerPool] Failed to terminate worker ${worker.threadId}:`, error); - } + logger.debug( + 'system', + `[WorkerPool] Failed to terminate worker ${worker.threadId}:`, + error, + ); } // Remove unresponsive worker and create a replacement @@ -355,11 +406,13 @@ class WorkerPool { // NOW mark worker as available since it passed health check this.availableWorkers.add(worker); - if (verbose) { - console.log( - `[WorkerPool] ${this.workerType} worker ${worker.threadId} passed health check and is now available`, - ); - } + // Reset failure tracking on successful recovery + this.resetFailureTracking(); + + logger.debug( + 'system', + `[WorkerPool] ${this.workerType} worker ${worker.threadId} passed health check and is now available`, + ); // Process any queued jobs now that we have an available worker this.processNextJob(); @@ -385,14 +438,13 @@ class WorkerPool { worker.off('message', healthCheckHandler); - logError( + logger.workerError( + 'worker_health_check_send_failure', new WorkerError( this.workerType, healthCheckId, error instanceof Error ? error : new Error(String(error)), ), - 'worker_health_check_send_failure', - undefined, { workerType: this.workerType, threadId: worker.threadId, @@ -430,12 +482,48 @@ class WorkerPool { } catch (error) { // Ignore termination errors } - } /** + } + + /** + * Track a worker failure and determine appropriate logging level + */ + private trackWorkerFailure(): { shouldLogAsError: boolean; retryCount: number } { + const now = Date.now(); + + // Reset consecutive failures if enough time has passed since last failure + if (now - this.lastFailureTime > workerRetryWindow) { + this.consecutiveFailures = 0; + } + + this.failureCount++; + this.consecutiveFailures++; + this.lastFailureTime = now; + + // Log as ERROR only if we've exceeded the max retries + const shouldLogAsError = this.consecutiveFailures > maxWorkerRetries; + + return { + shouldLogAsError, + retryCount: this.consecutiveFailures, + }; + } + + /** + * Reset failure tracking when workers recover successfully + */ + private resetFailureTracking() { + this.consecutiveFailures = 0; + } + + /** * Mark a worker as available for new jobs */ private markWorkerAvailable(worker: Worker) { this.busyWorkers.delete(worker); this.availableWorkers.add(worker); + + // Reset failure tracking on successful job completion + this.resetFailureTracking(); } /** @@ -456,9 +544,7 @@ class WorkerPool { * Shutdown all workers in this pool */ public async shutdown() { - if (verbose) { - console.log(`[WorkerPool] Shutting down ${this.workerType} worker pool`); - } + logger.debug('system', `[WorkerPool] Shutting down ${this.workerType} worker pool`); const shutdownPromises = this.workers.map((worker) => { return new Promise((resolve) => { @@ -490,11 +576,10 @@ class WorkerManager { this.embeddingPool = new WorkerPool('embedder', embeddingWorkers); this.matchingPool = new WorkerPool('matcher', matchingWorkers); - if (verbose) { - console.log( - `[WorkerManager] Initialised with ${embeddingWorkers} embedding workers and ${matchingWorkers} matching workers`, - ); - } + logger.debug( + 'system', + `[WorkerManager] Initialised with ${embeddingWorkers} embedding workers and ${matchingWorkers} matching workers`, + ); // Create ready promise this.readyPromise = this.waitForWorkersReady(); @@ -504,9 +589,7 @@ class WorkerManager { * Wait for all worker pools to have at least one available worker */ private async waitForWorkersReady(): Promise { - if (verbose) { - console.log('[WorkerManager] Waiting for worker pools to become ready...'); - } + logger.debug('system', '[WorkerManager] Waiting for worker pools to become ready...'); const maxWaitTime = 30_000; // 30 seconds max wait const checkInterval = 500; // Check every 500ms @@ -522,20 +605,20 @@ class WorkerManager { if (embeddingReady && matchingReady) { this.isReady = true; - if (verbose) { - console.log(`[WorkerManager] All worker pools ready: + logger.debug( + 'system', + `[WorkerManager] All worker pools ready: - Embedding workers: ${embeddingStats.totalWorkers} total, ${embeddingStats.availableWorkers} available - - Matching workers: ${matchingStats.totalWorkers} total, ${matchingStats.availableWorkers} available`); - } + - Matching workers: ${matchingStats.totalWorkers} total, ${matchingStats.availableWorkers} available`, + ); resolve(); } else if (Date.now() - startTime > maxWaitTime) { reject(new Error('Timeout waiting for worker pools to become ready')); } else { - if (verbose) { - console.log( - `[WorkerManager] Waiting for workers... Embedding: ${embeddingReady ? '✓' : '✗'}, Matching: ${matchingReady ? '✓' : '✗'}`, - ); - } + logger.debug( + 'system', + `[WorkerManager] Waiting for workers... Embedding: ${embeddingReady ? '✓' : '✗'}, Matching: ${matchingReady ? '✓' : '✗'}`, + ); setTimeout(checkReady, checkInterval); } }; @@ -564,19 +647,20 @@ class WorkerManager { * @deprecated Use ready() promise instead */ private performInitialHealthCheck() { - if (verbose) { - console.log('[WorkerManager] Performing initial health check on all worker pools'); - } + logger.debug('system', '[WorkerManager] Performing initial health check on all worker pools'); const embeddingStats = this.embeddingPool.getStats(); const matchingStats = this.matchingPool.getStats(); - console.log(`[WorkerManager] Health check complete: + logger.debug( + 'system', + `[WorkerManager] Health check complete: - Embedding workers: ${embeddingStats.totalWorkers} total, ${embeddingStats.availableWorkers} available, ${embeddingStats.pendingHealthChecks} pending health checks - - Matching workers: ${matchingStats.totalWorkers} total, ${matchingStats.availableWorkers} available, ${matchingStats.pendingHealthChecks} pending health checks`); + - Matching workers: ${matchingStats.totalWorkers} total, ${matchingStats.availableWorkers} available, ${matchingStats.pendingHealthChecks} pending health checks`, + ); if (embeddingStats.totalWorkers === 0 || matchingStats.totalWorkers === 0) { - console.error('[WorkerManager] WARNING: Some worker pools have no active workers!'); + logger.error('system', '[WorkerManager] WARNING: Some worker pools have no active workers!'); } } @@ -622,9 +706,7 @@ class WorkerManager { * Shutdown all worker pools */ public async shutdown() { - if (verbose) { - console.log('[WorkerManager] Shutting down all worker pools'); - } + logger.debug('system', '[WorkerManager] Shutting down all worker pools'); await Promise.all([this.embeddingPool.shutdown(), this.matchingPool.shutdown()]); } @@ -638,9 +720,7 @@ async function handleReasoning(job: any, message: any) { const jobId = job.jobId || 'unknown'; try { - if (verbose) { - console.log(`[WorkerManager] Processing reasoning for job ${jobId}`); - } + logger.debug('system', `[WorkerManager] Processing reasoning for job ${jobId}`); const finalMatches = []; @@ -670,7 +750,7 @@ async function handleReasoning(job: any, message: any) { error instanceof Error ? error : new Error(String(error)), ); - logError(reasoningError, 'reasoning_task_failure', undefined, { + logger.workerError('reasoning_task_failure', reasoningError, { jobId, task: task.substring(0, 100) + (task.length > 100 ? '...' : ''), matchCount: (matches as any[]).length, @@ -715,11 +795,10 @@ async function handleReasoning(job: any, message: any) { try { db.updateJobStatus(job.jobId, 'completed'); - if (verbose) { - console.log( - `[WorkerManager] Job ${jobId} completed successfully with ${finalMatches.length} matches`, - ); - } + logger.debug( + 'system', + `[WorkerManager] Job ${jobId} completed successfully with ${finalMatches.length} matches`, + ); } catch (error) { throw new DatabaseError( 'updateJobStatus', @@ -727,10 +806,9 @@ async function handleReasoning(job: any, message: any) { ); } } catch (error) { - logError( - error instanceof Error ? error : new Error(String(error)), + logger.workerError( 'reasoning_handler_failure', - undefined, + error instanceof Error ? error : new Error(String(error)), { jobId }, ); @@ -739,13 +817,12 @@ async function handleReasoning(job: any, message: any) { const db = getDB(job.dbName); db.updateJobStatus(job.jobId, 'failed'); } catch (dbError) { - logError( + logger.workerError( + 'job_failure_update_error', new DatabaseError( 'updateJobStatus', dbError instanceof Error ? dbError : new Error(String(dbError)), ), - 'job_failure_update_error', - undefined, { jobId }, ); } From 671334da9f68cd66eba3f8c9f87c16742d8f6a08 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:48:06 +0200 Subject: [PATCH 17/48] feat: Improve logging structure by creating date-specific log directories and updating log file paths --- src/competence-matcher/src/utils/logger.ts | 27 +++++++++++----------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/competence-matcher/src/utils/logger.ts b/src/competence-matcher/src/utils/logger.ts index 24d90e732..9c3d3265c 100644 --- a/src/competence-matcher/src/utils/logger.ts +++ b/src/competence-matcher/src/utils/logger.ts @@ -82,23 +82,24 @@ export class Logger { private initialiseLogStreams(): void { if (!this.config.enableFile) return; - // Ensure log directory exists - if (!fs.existsSync(this.config.logDir)) { - fs.mkdirSync(this.config.logDir, { recursive: true }); - } - const today = new Date().toISOString().split('T')[0]; + const dayLogDir = path.join(this.config.logDir, today); + + // Ensure date-specific log directory exists + if (!fs.existsSync(dayLogDir)) { + fs.mkdirSync(dayLogDir, { recursive: true }); + } - // Main log file (all logs) - const mainLogPath = path.join(this.config.logDir, `competence-matcher-${today}.json`); + // Main log file (all logs) - structured JSON + const mainLogPath = path.join(dayLogDir, 'competence-matcher.json'); this.logStreams.set('main', fs.createWriteStream(mainLogPath, { flags: 'a' })); - // Error-only log file - const errorLogPath = path.join(this.config.logDir, `errors-${today}.log`); + // Error-only log file - human readable + const errorLogPath = path.join(dayLogDir, 'errors.log'); this.logStreams.set('error', fs.createWriteStream(errorLogPath, { flags: 'a' })); - // Request-only log file - const requestLogPath = path.join(this.config.logDir, `requests-${today}.log`); + // Request-only log file - human readable + const requestLogPath = path.join(dayLogDir, 'requests.log'); this.logStreams.set('request', fs.createWriteStream(requestLogPath, { flags: 'a' })); } @@ -119,7 +120,7 @@ export class Logger { const typeColor = typeColors[entry.type]; const timestamp = `${colors.gray}[${entry.timestamp}]${colors.reset}`; const level = `${levelColor}${entry.levelName.padEnd(5)}${colors.reset}`; - const type = `${typeColor}[${entry.type}${entry.requestId ? `:${String(entry.requestId).slice(0, 8)}` : ''}]${colors.reset}`; + const type = `${typeColor}[${entry.type}${entry.requestId ? `:${String(entry.requestId).slice(-12)}` : ''}]${colors.reset}`; const message = entry.message; let output = `${timestamp} ${level} ${type} ${message}`; @@ -145,7 +146,7 @@ export class Logger { private formatPlainMessage(entry: LogEntry): string { const timestamp = `[${entry.timestamp}]`; const level = entry.levelName.padEnd(5); - const type = `[${entry.type}${entry.requestId ? `:${String(entry.requestId).slice(0, 8)}` : ''}]`; + const type = `[${entry.type}${entry.requestId ? `:${String(entry.requestId).slice(-12)}` : ''}]`; let output = `${timestamp} ${level} ${type} ${entry.message}`; From d05fa15f8db3c1a06f91dc7144b88ffea3cd4de3 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:02:31 +0200 Subject: [PATCH 18/48] feat: Add retry logic and error handling for Ollama model pulling; enhance worker and error handler logging --- src/competence-matcher/src/config.ts | 3 + .../src/middleware/error-handler.ts | 40 ++-- src/competence-matcher/src/utils/ollama.ts | 215 ++++++++++++++++-- src/competence-matcher/src/utils/worker.ts | 4 +- 4 files changed, 221 insertions(+), 41 deletions(-) diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index 9b90a2f0d..1ea9a057d 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -31,4 +31,7 @@ export const config = { modelLoadingTimeout: parseInt(process.env.MODEL_LOADING_TIMEOUT || '20', 10), // Timeout for model loading in seconds maxWorkerRetries: parseInt(process.env.MAX_WORKER_RETRIES || '3', 10), // Maximum worker restart attempts before escalating to ERROR workerRetryWindow: parseInt(process.env.WORKER_RETRY_WINDOW || '300', 10) * 1_000, // Time window in seconds to reset retry count (converted to ms) + maxOllamaRetries: parseInt(process.env.MAX_OLLAMA_RETRIES || '5', 10), // Maximum model pull retry attempts + ollamaRetryDelay: parseInt(process.env.OLLAMA_RETRY_DELAY || '30', 10) * 1_000, // Base delay between retries in seconds (converted to ms) + ollamaRetryBackoff: parseFloat(process.env.OLLAMA_RETRY_BACKOFF || '1.5'), // Exponential backoff multiplier }; diff --git a/src/competence-matcher/src/middleware/error-handler.ts b/src/competence-matcher/src/middleware/error-handler.ts index 716de784c..5ded0a6d5 100644 --- a/src/competence-matcher/src/middleware/error-handler.ts +++ b/src/competence-matcher/src/middleware/error-handler.ts @@ -5,7 +5,7 @@ import { getLogger } from '../utils/logger'; const logger = getLogger(); /** - * Enhanced error handler middleware using the new logging system + * Error handler middleware using the logging system */ export function errorHandler( error: Error | CompetenceMatcherError, @@ -17,13 +17,19 @@ export function errorHandler( if (error instanceof CompetenceMatcherError) { // Handle our custom errors - logger.error('request', `${error.context}: ${error.message}`, error, { - statusCode: error.statusCode, - details: error.details, - path: req.path, - method: req.method, + logger.error( + 'request', + `${error.context}: ${error.message}`, + error, + { + statusCode: error.statusCode, + details: error.details, + path: req.path, + method: req.method, + requestId, + }, requestId, - }, requestId); + ); res.status(error.statusCode).json({ error: { @@ -35,14 +41,20 @@ export function errorHandler( }); } else { // Handle unexpected errors - logger.error('system', 'Unhandled error occurred', error, { - path: req.path, - method: req.method, - body: req.body, - query: req.query, - params: req.params, + logger.error( + 'system', + 'Unhandled error occurred', + error, + { + path: req.path, + method: req.method, + body: req.body, + query: req.query, + params: req.params, + requestId, + }, requestId, - }, requestId); + ); res.status(500).json({ error: { diff --git a/src/competence-matcher/src/utils/ollama.ts b/src/competence-matcher/src/utils/ollama.ts index 712209f6f..b1a9fe795 100644 --- a/src/competence-matcher/src/utils/ollama.ts +++ b/src/competence-matcher/src/utils/ollama.ts @@ -3,7 +3,15 @@ import { config } from '../config'; import { OllamaConnectionError } from './errors'; import { getLogger } from './logger'; -const { ollamaPath, splittingModel, reasonModel, ollamaBearerToken } = config; +const { + ollamaPath, + splittingModel, + reasonModel, + ollamaBearerToken, + maxOllamaRetries, + ollamaRetryDelay, + ollamaRetryBackoff, +} = config; // Lazy logger initialization to avoid module loading order issues let logger: ReturnType | null = null; @@ -22,6 +30,134 @@ export const ollama = new Ollama({ }, }); +/** + * Utility function to sleep for a given duration + */ +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * Check if an error is likely a timeout error from nginx proxy + */ +function isTimeoutError(error: any): boolean { + if (error instanceof OllamaConnectionError) { + const originalError = error.cause; + // Check for common timeout/proxy error indicators + if (originalError && typeof originalError === 'object') { + const errorString = String(originalError).toLowerCase(); + const errorMessage = (originalError as any).message?.toLowerCase() || ''; + + return ( + errorString.includes('504') || // Gateway timeout + errorString.includes('timeout') || // General timeout + errorString.includes('etimedout') || // Node.js timeout + errorString.includes('econnreset') || // Connection reset + errorMessage.includes('504') || + errorMessage.includes('timeout') || + errorMessage.includes('gateway timeout') + ); + } + } + return false; +} + +/** + * Get error message safely from unknown error type + */ +function getErrorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + return String(error); +} + +/** + * Attempt to pull a model with retry logic for handling proxy timeouts + */ +async function pullModelWithRetry(modelName: string): Promise { + for (let attempt = 1; attempt <= maxOllamaRetries; attempt++) { + try { + getLoggerInstance().debug( + 'model', + `Attempting to pull Ollama model '${modelName}' (attempt ${attempt}/${maxOllamaRetries})`, + ); + + const modelpull = await ollama.pull({ + model: modelName, + insecure: false, + stream: false, + }); + + // Check if the model was successfully pulled + if (!modelpull || modelpull.status !== 'success') { + throw new OllamaConnectionError( + ollamaPath, + 'pull_model', + new Error(`Model pull failed: ${modelpull?.status || 'Unknown error'}`), + ); + } + + getLoggerInstance().info('model', `Successfully pulled Ollama model '${modelName}'`); + return true; + } catch (error) { + const isTimeout = isTimeoutError(error); + const isLastAttempt = attempt === maxOllamaRetries; + + if (isTimeout && !isLastAttempt) { + // Log as warning for timeout errors that we'll retry + const delay = ollamaRetryDelay * Math.pow(ollamaRetryBackoff, attempt - 1); + getLoggerInstance().warn( + 'model', + `Ollama model pull timeout for '${modelName}' (attempt ${attempt}/${maxOllamaRetries}), retrying in ${Math.round(delay / 1000)}s...`, + { + modelName, + attempt, + maxRetries: maxOllamaRetries, + retryDelay: delay, + error: getErrorMessage(error), + }, + ); + + // Wait before retrying with exponential backoff + await sleep(delay); + continue; + } else if (isLastAttempt) { + // Final attempt failed - escalate to error + getLoggerInstance().error( + 'model', + `Failed to pull Ollama model '${modelName}' after ${maxOllamaRetries} attempts`, + error instanceof Error ? error : new Error(String(error)), + { + modelName, + totalAttempts: maxOllamaRetries, + finalError: getErrorMessage(error), + }, + ); + throw error; + } else { + // Non-timeout error on non-final attempt + getLoggerInstance().warn( + 'model', + `Ollama model pull error for '${modelName}' (attempt ${attempt}/${maxOllamaRetries}): ${getErrorMessage(error)}`, + { + modelName, + attempt, + maxRetries: maxOllamaRetries, + error: getErrorMessage(error), + }, + ); + + // Wait before retrying (shorter delay for non-timeout errors) + await sleep(Math.min(ollamaRetryDelay, 5000)); + continue; + } + } + } + + return false; // Should never reach here due to throw in loop +} + /** * Ensures that all required models are available by checking their existence * in the Ollama server. If a model is not available, it will be downloaded. @@ -55,34 +191,63 @@ export async function ensureAllOllamaModelsAreAvailable() { getLoggerInstance().info('model', `Ollama model '${model}' not found, attempting to pull...`); try { - const modelpull = await ollama.pull({ - model, - insecure: false, - stream: false, - }); - - // Check if the model was successfully pulled - if (!modelpull || modelpull.status !== 'success') { - throw new OllamaConnectionError( - ollamaPath, - 'pull_model', - new Error(`Model pull failed: ${modelpull?.status || 'Unknown error'}`), + await pullModelWithRetry(model); + + // After successful pull, re-check model availability to ensure it's actually there + getLoggerInstance().debug('model', `Re-checking availability of '${model}' after pull...`); + + try { + const updatedModelList = await ollama.list(); + const updatedAvailableModels = updatedModelList.models.map((m) => m.model); + + if (!updatedAvailableModels.includes(model)) { + throw new OllamaConnectionError( + ollamaPath, + 'verify_model', + new Error( + `Model '${model}' was reportedly pulled successfully but is not available in model list`, + ), + ); + } + + getLoggerInstance().debug('model', `Confirmed '${model}' is now available after pull`); + } catch (verifyError) { + getLoggerInstance().warn( + 'model', + `Failed to verify model '${model}' availability after pull: ${getErrorMessage(verifyError)}`, + { model, verifyError: getErrorMessage(verifyError) }, ); - } - getLoggerInstance().info('model', `Successfully pulled Ollama model '${model}'`); - } catch (error) { - // If the pull takes too long and the ollama is behind a proxy, it can timeout (504 as response code) - // In this case, we just recheck the model availability - // TODO: - if (error instanceof OllamaConnectionError) { - throw error; + // If verification fails, we still consider the pull successful if it didn't throw + // This handles cases where the model is actually available but listing fails } - throw new OllamaConnectionError( - ollamaPath, - 'pull_model', - error instanceof Error ? error : new Error(String(error)), + } catch (error) { + // Re-check one more time in case the model was actually pulled despite the error + getLoggerInstance().debug( + 'model', + `Pull failed for '${model}', performing final availability check...`, ); + + try { + const finalModelList = await ollama.list(); + const finalAvailableModels = finalModelList.models.map((m) => m.model); + + if (finalAvailableModels.includes(model)) { + getLoggerInstance().info( + 'model', + `Model '${model}' is now available despite pull error - continuing`, + ); + continue; // Model is actually available, continue to next model + } + } catch (listError) { + getLoggerInstance().debug( + 'model', + `Final model list check failed: ${getErrorMessage(listError)}`, + ); + } + + // Model is definitely not available, propagate the original error + throw error; } } else { getLoggerInstance().debug( diff --git a/src/competence-matcher/src/utils/worker.ts b/src/competence-matcher/src/utils/worker.ts index 1d894283b..2d3f93dd6 100644 --- a/src/competence-matcher/src/utils/worker.ts +++ b/src/competence-matcher/src/utils/worker.ts @@ -156,7 +156,7 @@ export class WorkerContext { } /** - * Enhanced worker creation with context support + * Worker creation with context support */ static createWorkerWithContext( filename: string, @@ -188,7 +188,7 @@ export class WorkerContext { } /** - * Enhanced worker message handling with logging + * Worker message handling with logging */ export function handleWorkerMessage( worker: Worker, From d1909e1253acac4c496b13fbe1042ff73f1f509a Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 21 Aug 2025 16:39:51 +0200 Subject: [PATCH 19/48] docs: Add usage comment for optimum-cli export command in ONNX model script --- src/competence-matcher/tools/onnx-model-external-data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/competence-matcher/tools/onnx-model-external-data.py b/src/competence-matcher/tools/onnx-model-external-data.py index f2c74c216..83e9c3ab2 100644 --- a/src/competence-matcher/tools/onnx-model-external-data.py +++ b/src/competence-matcher/tools/onnx-model-external-data.py @@ -2,6 +2,8 @@ import argparse import sys +# optimum-cli export onnx --model + def main(): parser = argparse.ArgumentParser(description="Convert ONNX model weights to external data format.") parser.add_argument('--input', '-i', required=True, help='Path to the input ONNX model') From 29ea6cfa2e7a5830f8b7e879a2b28231578f280b Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 21 Aug 2025 18:01:47 +0200 Subject: [PATCH 20/48] feat: Update model loading timeout and introduce model loading time configuration in worker manager --- src/competence-matcher/src/config.ts | 3 ++- src/competence-matcher/src/worker/worker-manager.ts | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index 1ea9a057d..a4543add8 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -28,7 +28,8 @@ export const config = { logToConsole: process.env.LOG_CONSOLE !== 'false', // Default to true unless explicitly set to false logToFile: process.env.LOG_FILE === 'true' || false, // Default to false unless explicitly set to true logPath: process.env.LOG_PATH || 'logs/', - modelLoadingTimeout: parseInt(process.env.MODEL_LOADING_TIMEOUT || '20', 10), // Timeout for model loading in seconds + modelLoadingTimeout: parseInt(process.env.MODEL_LOADING_TIMEOUT || '20', 10) * 1_000, // Waiting time, before trying to load a model again in seconds (converted to ms) + modelLoadingTime: parseInt(process.env.MODEL_LOADING_TIME || '300', 10) * 1_000, // Time to wait for model loading in seconds (converted to ms) maxWorkerRetries: parseInt(process.env.MAX_WORKER_RETRIES || '3', 10), // Maximum worker restart attempts before escalating to ERROR workerRetryWindow: parseInt(process.env.WORKER_RETRY_WINDOW || '300', 10) * 1_000, // Time window in seconds to reset retry count (converted to ms) maxOllamaRetries: parseInt(process.env.MAX_OLLAMA_RETRIES || '5', 10), // Maximum model pull retry attempts diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 1bb122e67..7fc9f1faa 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -13,6 +13,7 @@ const { modelLoadingTimeout, maxWorkerRetries, workerRetryWindow, + modelLoadingTime, } = config; const logger = getLogger(); @@ -327,7 +328,7 @@ class WorkerPool { */ private performHealthCheck(worker: Worker) { const healthCheckId = `health_check_${Date.now()}_${worker.threadId}`; - const timeout = modelLoadingTimeout * 1_000; // convert to milliseconds + const timeout = modelLoadingTimeout; logger.debug( 'system', @@ -591,7 +592,7 @@ class WorkerManager { private async waitForWorkersReady(): Promise { logger.debug('system', '[WorkerManager] Waiting for worker pools to become ready...'); - const maxWaitTime = 30_000; // 30 seconds max wait + const maxWaitTime = modelLoadingTime; // 30 seconds max wait const checkInterval = 500; // Check every 500ms const startTime = Date.now(); From cdf7fb95e605ab81c15baba8db0aa8e6524d8521 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 21 Aug 2025 18:32:47 +0200 Subject: [PATCH 21/48] refactor: Remove unnecessary model availability checks in main function; update logger initialisation comments in worker files --- src/competence-matcher/src/server.ts | 7 ++++--- src/competence-matcher/src/worker/embedder.ts | 3 ++- src/competence-matcher/src/worker/matcher.ts | 9 +++++---- src/competence-matcher/src/worker/worker-manager.ts | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/competence-matcher/src/server.ts b/src/competence-matcher/src/server.ts index 1451fb25d..dd663bab9 100644 --- a/src/competence-matcher/src/server.ts +++ b/src/competence-matcher/src/server.ts @@ -38,9 +38,10 @@ async function main() { logger.info('server', 'Initialising competence matcher service...'); // Ensure all required models are available - // Hugging Face models - logger.info('server', 'Checking HuggingFace models availability...'); - await ensureAllHuggingfaceModelsAreAvailable(); + + // Hugging Face models -> This should no longer be necessary as the availability check is now handled by the worker manager + // logger.info('server', 'Checking HuggingFace models availability...'); + // await ensureAllHuggingfaceModelsAreAvailable(); // Ollama models logger.info('server', 'Checking Ollama models availability...'); diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index 034a4d299..7312a4b4c 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -6,9 +6,10 @@ import { EmbeddingJob } from '../utils/types'; import { workerLogger } from '../utils/worker'; import { getLogger, createLoggerConfig, Logger } from '../utils/logger'; -// Initialise logger for this worker thread +// Initialise logger for this worker thread & Initialise Embedding model try { Logger.getInstance(createLoggerConfig()); + Embedding.getInstance(); } catch (error) { // Logger already initialised } diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index c3f40c005..59700dccb 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -6,9 +6,11 @@ import { Match, MatchingJob } from '../utils/types'; import ZeroShot from '../tasks/semantic-zeroshot'; import { Logger, createLoggerConfig } from '../utils/logger'; -// Initialise logger for this worker thread +// Initialise logger for this worker thread & Initialise ZeroShot model try { Logger.getInstance(createLoggerConfig()); + Embedding.getInstance(); + ZeroShot.getInstance(); } catch (error) { // Logger already initialised } @@ -66,8 +68,7 @@ parentPort.on('message', async (message: any) => { try { // Generate embedding for the task description - // Note: This could potentially be optimised by having the embedder worker handle this - // and passing the embedding directly, but for now we keep the same approach + // Todo: Handle embedding via the dedicated embedding worker const [vector] = await Embedding.embed(description); // Search for matches in the competence database @@ -98,8 +99,8 @@ parentPort.on('message', async (message: any) => { ); if (scalingClassification) { - // @ts-ignore - ZeroShot classification result structure if ( + // @ts-ignore - ZeroShot classification result structure scalingClassification.labels[0] === scalingLabels[2] && // @ts-ignore scalingClassification.scores[0] > 0.65 diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 7fc9f1faa..66f1749f7 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -592,7 +592,7 @@ class WorkerManager { private async waitForWorkersReady(): Promise { logger.debug('system', '[WorkerManager] Waiting for worker pools to become ready...'); - const maxWaitTime = modelLoadingTime; // 30 seconds max wait + const maxWaitTime = modelLoadingTime; const checkInterval = 500; // Check every 500ms const startTime = Date.now(); From 9540f327146acac18b812a401db27c030b0263cb Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 16 Sep 2025 14:17:21 +0200 Subject: [PATCH 22/48] Enhance worker management and logging: - Introduced health check and job timeout mechanisms for worker pools. - Added real IP extraction in request logger. - Improved model initialisation handling in worker threads. - Updated logger to manage log rotation and cleanup. - Refactored model instance management for memory efficiency. --- src/competence-matcher/src/config.ts | 10 +- .../src/middleware/request-logger.ts | 36 +++- src/competence-matcher/src/server.ts | 8 +- .../src/utils/huggingface.ts | 12 ++ src/competence-matcher/src/utils/logger.ts | 60 ++++-- src/competence-matcher/src/utils/model.ts | 5 + src/competence-matcher/src/utils/worker.ts | 186 ++-------------- src/competence-matcher/src/worker/embedder.ts | 78 ++++--- src/competence-matcher/src/worker/matcher.ts | 127 ++++++++--- .../src/worker/worker-manager.ts | 198 ++++++++++++++++-- 10 files changed, 455 insertions(+), 265 deletions(-) diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index a4543add8..4d3d68e34 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -28,8 +28,14 @@ export const config = { logToConsole: process.env.LOG_CONSOLE !== 'false', // Default to true unless explicitly set to false logToFile: process.env.LOG_FILE === 'true' || false, // Default to false unless explicitly set to true logPath: process.env.LOG_PATH || 'logs/', - modelLoadingTimeout: parseInt(process.env.MODEL_LOADING_TIMEOUT || '20', 10) * 1_000, // Waiting time, before trying to load a model again in seconds (converted to ms) - modelLoadingTime: parseInt(process.env.MODEL_LOADING_TIME || '300', 10) * 1_000, // Time to wait for model loading in seconds (converted to ms) + workerHealthCheckTimeout: + parseInt( + process.env.WORKER_HEALTH_CHECK_TIMEOUT || process.env.MODEL_LOADING_TIMEOUT || '20', + 10, + ) * 1_000, // Maximum time to wait for individual worker health check response (seconds to ms) + systemStartupTimeout: + parseInt(process.env.SYSTEM_STARTUP_TIMEOUT || process.env.MODEL_LOADING_TIME || '300', 10) * + 1_000, // Maximum time to wait for all worker pools to become ready at startup (seconds to ms) maxWorkerRetries: parseInt(process.env.MAX_WORKER_RETRIES || '3', 10), // Maximum worker restart attempts before escalating to ERROR workerRetryWindow: parseInt(process.env.WORKER_RETRY_WINDOW || '300', 10) * 1_000, // Time window in seconds to reset retry count (converted to ms) maxOllamaRetries: parseInt(process.env.MAX_OLLAMA_RETRIES || '5', 10), // Maximum model pull retry attempts diff --git a/src/competence-matcher/src/middleware/request-logger.ts b/src/competence-matcher/src/middleware/request-logger.ts index 923bd9d3e..31539d585 100644 --- a/src/competence-matcher/src/middleware/request-logger.ts +++ b/src/competence-matcher/src/middleware/request-logger.ts @@ -13,6 +13,34 @@ declare global { } } +/** + * Extract real IP address from request, considering proxy headers + */ +function getRealIP(req: Request): string { + // Check for common proxy headers in order of preference + const forwardedFor = req.headers['x-forwarded-for']; + const realIP = req.headers['x-real-ip']; + const clientIP = req.headers['x-client-ip']; + + // x-forwarded-for can be a comma-separated list, take the first (original) IP + if (forwardedFor) { + const ips = Array.isArray(forwardedFor) ? forwardedFor[0] : forwardedFor; + return ips.split(',')[0].trim(); + } + + // Single IP headers + if (realIP) { + return Array.isArray(realIP) ? realIP[0] : realIP; + } + + if (clientIP) { + return Array.isArray(clientIP) ? clientIP[0] : clientIP; + } + + // Fallback to express's req.ip (which might be the proxy IP) + return req.ip || 'unknown'; +} + /** * Request logger middleware */ @@ -25,15 +53,20 @@ export function requestLogger(req: Request, res: Response, next: NextFunction): req.startTime = startTime; // Log incoming request + const realIP = getRealIP(req); logger.debug( 'request', `Incoming ${req.method} ${req.path}`, { query: req.query, params: req.params, - ip: req.ip, + ip: realIP, + proxyIP: req.ip, // Keep original for debugging userAgent: req.headers['user-agent'], contentType: req.headers['content-type'], + // Include proxy headers for debugging if they exist + ...(req.headers['x-forwarded-for'] && { 'x-forwarded-for': req.headers['x-forwarded-for'] }), + ...(req.headers['x-real-ip'] && { 'x-real-ip': req.headers['x-real-ip'] }), }, requestId, ); @@ -46,6 +79,7 @@ export function requestLogger(req: Request, res: Response, next: NextFunction): // Log the request completion logger.request(req.method, req.path, res.statusCode, responseTime, requestId, { + ip: getRealIP(req), contentLength: res.get('content-length'), responseSize: body ? Buffer.byteLength(body, 'utf8') : 0, }); diff --git a/src/competence-matcher/src/server.ts b/src/competence-matcher/src/server.ts index dd663bab9..5beefa6e4 100644 --- a/src/competence-matcher/src/server.ts +++ b/src/competence-matcher/src/server.ts @@ -39,9 +39,9 @@ async function main() { // Ensure all required models are available - // Hugging Face models -> This should no longer be necessary as the availability check is now handled by the worker manager - // logger.info('server', 'Checking HuggingFace models availability...'); - // await ensureAllHuggingfaceModelsAreAvailable(); + // Hugging Face models + logger.info('server', 'Checking HuggingFace models availability...'); + await ensureAllHuggingfaceModelsAreAvailable(); // Ollama models logger.info('server', 'Checking Ollama models availability...'); @@ -69,7 +69,7 @@ async function main() { ); logger.error('server', 'Failed to start due to initialisation error', initError); - process.exit(1); + throw initError; // Rethrow to be caught by outer catch } // Parse JSON diff --git a/src/competence-matcher/src/utils/huggingface.ts b/src/competence-matcher/src/utils/huggingface.ts index 0f76908dd..80125f01f 100644 --- a/src/competence-matcher/src/utils/huggingface.ts +++ b/src/competence-matcher/src/utils/huggingface.ts @@ -5,6 +5,14 @@ import { getLogger } from './logger'; const logger = getLogger(); +/** + * Ensures that all required Hugging Face models are available by attempting to load them. + * If any model fails to load, an error is thrown. + * + * As this is meant to be used in the main thread, it will trigger the download and caching of models if not already present. + * Since the models inference, however, is meant to be run in worker threads, the model are loaded into ram in the main thread redundantly. + * To mitigate this, the model instances are deleted after the check, so they can be reloaded in the worker threads when needed. + */ export async function ensureAllHuggingfaceModelsAreAvailable() { logger.debug('model', 'Checking availability of required models...'); @@ -16,6 +24,10 @@ export async function ensureAllHuggingfaceModelsAreAvailable() { await ZeroShotSemanticOpposites.getInstance(); logger.modelInfo('All HuggingFace models initialised successfully'); + + // Delete instances to free up memory as they will be reloaded in worker threads + Embedding.deleteInstance(); + ZeroShotSemanticOpposites.deleteInstance(); } catch (error) { throw new HuggingFaceModelError( 'unknown', // We don't know which specific model failed - will maybe add later diff --git a/src/competence-matcher/src/utils/logger.ts b/src/competence-matcher/src/utils/logger.ts index 9c3d3265c..598d9b019 100644 --- a/src/competence-matcher/src/utils/logger.ts +++ b/src/competence-matcher/src/utils/logger.ts @@ -4,9 +4,6 @@ import { randomUUID } from 'node:crypto'; import { config } from '../config'; import { LogLevel, LogType, LogEntry, LoggerConfig } from './types'; -// Re-export types for convenience -export { LogLevel, LogType, LogEntry, LoggerConfig } from './types'; - export function createLoggerConfig(): LoggerConfig { // Parse enabled log types from config const enabledTypes = config.logTypes @@ -63,6 +60,7 @@ export class Logger { private static instance: Logger; private config: LoggerConfig; private logStreams: Map = new Map(); + private currentLogDate: string = ''; // Track current log date private constructor(config: LoggerConfig) { this.config = config; @@ -83,26 +81,61 @@ export class Logger { if (!this.config.enableFile) return; const today = new Date().toISOString().split('T')[0]; - const dayLogDir = path.join(this.config.logDir, today); + this.currentLogDate = today; + + this.createLogStreamsForDate(today); + } + + /** + * Create log streams for a specific date + */ + private createLogStreamsForDate(date: string): void { + const dayLogDir = path.join(this.config.logDir, date); // Ensure date-specific log directory exists if (!fs.existsSync(dayLogDir)) { fs.mkdirSync(dayLogDir, { recursive: true }); } + // Close existing streams if they exist + this.closeLogStreams(); + // Main log file (all logs) - structured JSON const mainLogPath = path.join(dayLogDir, 'competence-matcher.json'); this.logStreams.set('main', fs.createWriteStream(mainLogPath, { flags: 'a' })); - // Error-only log file - human readable + // Error-only log file const errorLogPath = path.join(dayLogDir, 'errors.log'); this.logStreams.set('error', fs.createWriteStream(errorLogPath, { flags: 'a' })); - // Request-only log file - human readable + // Request-only log file const requestLogPath = path.join(dayLogDir, 'requests.log'); this.logStreams.set('request', fs.createWriteStream(requestLogPath, { flags: 'a' })); } + /** + * Check if we need to rotate logs to a new date and do so if necessary + */ + private checkAndRotateLogsIfNeeded(): void { + const today = new Date().toISOString().split('T')[0]; + + if (today !== this.currentLogDate) { + // Date has changed, rotate to new log files + this.currentLogDate = today; + this.createLogStreamsForDate(today); + } + } + + /** + * Close all log streams + */ + private closeLogStreams(): void { + for (const stream of this.logStreams.values()) { + stream.end(); + } + this.logStreams.clear(); + } + private shouldLog(level: LogLevel, type: LogType): boolean { return level >= this.config.level && this.config.enabledTypes.includes(type); } @@ -166,13 +199,16 @@ export class Logger { private writeToFile(entry: LogEntry): void { if (!this.config.enableFile) return; - // Write to main log file (structured JSON) + // Check if we need to rotate logs for a new date + this.checkAndRotateLogsIfNeeded(); + + // Write to main log file (structured JSON) - EXCEPT for requests const mainStream = this.logStreams.get('main'); - if (mainStream) { + if (mainStream && entry.type !== 'request') { mainStream.write(JSON.stringify(entry) + '\n'); } - // Write to error log file (human readable) + // Write to error log file if (entry.level === LogLevel.ERROR) { const errorStream = this.logStreams.get('error'); if (errorStream) { @@ -295,10 +331,7 @@ export class Logger { } public close(): void { - for (const stream of this.logStreams.values()) { - stream.end(); - } - this.logStreams.clear(); + this.closeLogStreams(); } // Update configuration @@ -307,7 +340,6 @@ export class Logger { // Reinitialise streams if file logging config changed if (newConfig.enableFile !== undefined || newConfig.logDir) { - this.close(); this.initialiseLogStreams(); } } diff --git a/src/competence-matcher/src/utils/model.ts b/src/competence-matcher/src/utils/model.ts index d7979a24f..b0320beb2 100644 --- a/src/competence-matcher/src/utils/model.ts +++ b/src/competence-matcher/src/utils/model.ts @@ -62,4 +62,9 @@ export abstract class TransformerPipeline { return this.instance; } + + public static deleteInstance() { + this.instance = null; + this.loaded = false; + } } diff --git a/src/competence-matcher/src/utils/worker.ts b/src/competence-matcher/src/utils/worker.ts index 2d3f93dd6..a53e4f147 100644 --- a/src/competence-matcher/src/utils/worker.ts +++ b/src/competence-matcher/src/utils/worker.ts @@ -4,26 +4,8 @@ import { Worker, parentPort } from 'worker_threads'; import VectorDataBase from '../db/db'; import { getDB } from './db'; import { config } from '../config'; -import { getLogger, createLoggerConfig } from './logger'; -const { maxJobTime } = config; - -// Initialise logger for worker if not already done -let logger: ReturnType | null = null; - -function ensureLogger() { - if (!logger) { - try { - logger = getLogger(); - } catch (error) { - // Logger not initialised yet, initialise it - const { getLogger: initLogger } = require('./logger'); - initLogger(createLoggerConfig()); - logger = getLogger(); - } - } - return logger; -} +const {} = config; export function createWorker(filename: string): Worker { const tsPath = path.resolve(__dirname, `../worker/${filename}.ts`); @@ -51,11 +33,6 @@ export async function withJobUpdates( }, ) { const db = getDB(job.dbName); - let exitCode = 0; // success by default - let maxTimeCheck = setTimeout(() => { - // if not completed by then, timeout - process.exit(2); - }, maxJobTime); try { if (options && options.onStart) { options.onStart(); @@ -73,154 +50,30 @@ export async function withJobUpdates( parentPort!.postMessage({ type: 'status', jobId: job.jobId, status: 'completed' }); } } catch (err) { + const error = err instanceof Error ? err : new Error(String(err)); + if (options && options.onError) { - options.onError(err as Error); + options.onError(error); } else { - exitCode = 1; // indicate failure + // Update job status in database + db.updateJobStatus(job.jobId, 'failed'); + + // Send error message to parent thread parentPort!.postMessage({ type: 'error', jobId: job.jobId, - error: err instanceof Error ? err.message : String(err), + error: error.message, }); - db.updateJobStatus(job.jobId, 'failed'); } + + // Always re-throw the error so the worker can handle it appropriately + throw error; } finally { - clearTimeout(maxTimeCheck); db.close(); // Don't close parentPort or exit process for static worker pools // Workers need to stay alive to process more jobs } -} - -export function log(...args: any[]) { - parentPort?.postMessage({ type: 'log', message: args.map(String).join(' ') }); -} - -/** - * Context manager for propagating request IDs through worker threads - */ -export class WorkerContext { - private static contexts: Map = new Map(); // jobId -> requestId - - /** - * Set context for a job - */ - static setContext(jobId: string, requestId: string): void { - this.contexts.set(jobId, requestId); - ensureLogger().debug('worker', `Context set for job ${jobId}`, { requestId }); - } - - /** - * Get context for a job - */ - static getContext(jobId: string): string | undefined { - return this.contexts.get(jobId); - } - - /** - * Remove context for a completed job - */ - static clearContext(jobId: string): void { - const removed = this.contexts.delete(jobId); - if (removed) { - ensureLogger().debug('worker', `Context cleared for job ${jobId}`); - } - } - - /** - * Log worker activity with proper context - */ - static logWorker( - level: 'debug' | 'info' | 'warn' | 'error', - jobId: string, - message: string, - data?: any, - error?: Error, - ): void { - const requestId = this.getContext(jobId); - - switch (level) { - case 'debug': - ensureLogger().worker(message, { jobId, ...data }, requestId); - break; - case 'info': - ensureLogger().workerInfo(message, { jobId, ...data }, requestId); - break; - case 'warn': - ensureLogger().warn('worker', message, { jobId, ...data }, requestId); - break; - case 'error': - ensureLogger().workerError(message, error, { jobId, ...data }, requestId); - break; - } - } - - /** - * Worker creation with context support - */ - static createWorkerWithContext( - filename: string, - jobId: string, - requestId: string, - workerData?: any, - ): Worker { - this.setContext(jobId, requestId); - - const worker = createWorker(filename); - - // Log worker lifecycle events - worker.on('online', () => { - this.logWorker('debug', jobId, `Worker ${worker.threadId} started`); - }); - - worker.on('exit', (code) => { - this.logWorker('debug', jobId, `Worker ${worker.threadId} exited with code ${code}`); - this.clearContext(jobId); - }); - - worker.on('error', (error) => { - this.logWorker('error', jobId, `Worker ${worker.threadId} error`, undefined, error); - this.clearContext(jobId); - }); - - return worker; - } -} - -/** - * Worker message handling with logging - */ -export function handleWorkerMessage( - worker: Worker, - jobId: string, - onMessage: (message: any) => void, - onError?: (error: Error) => void, -): void { - worker.on('message', (message) => { - WorkerContext.logWorker('debug', jobId, 'Received message from worker', { - threadId: worker.threadId, - messageType: message.type || 'unknown', - }); - onMessage(message); - }); - - worker.on('error', (error) => { - WorkerContext.logWorker( - 'error', - jobId, - 'Worker error occurred', - { - threadId: worker.threadId, - }, - error, - ); - if (onError) { - onError(error); - } - }); -} - -/** +} /** * Utility to log from within worker threads * This should be used in worker files to maintain context */ @@ -250,17 +103,8 @@ export function workerLogger( threadId: process.env.WORKER_THREAD_ID || 'unknown', }; - // If we're in a worker thread, post message to parent - if (typeof process !== 'undefined' && process.send) { - process.send(logData); - } // For Node.js worker threads, we use parentPort - try { - const { parentPort } = require('worker_threads'); - if (parentPort) { - parentPort.postMessage(logData); - } - } catch { - // Fallback - not in worker thread context + if (parentPort) { + parentPort.postMessage(logData); } } diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index 7312a4b4c..83543147d 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -1,18 +1,15 @@ -import { parentPort, threadId, workerData } from 'worker_threads'; +import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; import { splitSemantically } from '../tasks/semantic-split'; -import { withJobUpdates } from '../utils/worker'; +import { withJobUpdates, workerLogger } from '../utils/worker'; import { EmbeddingJob } from '../utils/types'; -import { workerLogger } from '../utils/worker'; -import { getLogger, createLoggerConfig, Logger } from '../utils/logger'; - -// Initialise logger for this worker thread & Initialise Embedding model -try { - Logger.getInstance(createLoggerConfig()); - Embedding.getInstance(); -} catch (error) { - // Logger already initialised -} + +// // Initialise embedding model on startup +// try { +// Embedding.getInstance(); +// } catch (error) { +// // Model already initialised +// } /** * New embedder worker that stays alive and processes jobs sequentially @@ -21,6 +18,18 @@ if (!parentPort) { throw new Error('This file must be run as a Worker thread'); } +let modelsInitialised = false; +async function ensureModelsInitialised() { + if (modelsInitialised) return; + try { + await Embedding.getInstance(); + modelsInitialised = true; + workerLogger('system', 'info', 'Embedder worker model initialized', { threadId }); + } catch (err) { + throw err; + } +} + // Set up health check handler immediately, before any heavy initialisation parentPort.on('message', async (message: any) => { // Handle health checks with highest priority @@ -47,8 +56,27 @@ parentPort.on('message', async (message: any) => { // Handle job messages const job = message as EmbeddingJob; - // Set global job context for logging - (global as any).CURRENT_JOB = job.jobId; + // ensure models are initialized (but do not run this for health_check) + try { + await ensureModelsInitialised(); + } catch (err) { + workerLogger( + job.jobId || 'system', + 'error', + 'Failed to initialise models', + { threadId }, + err instanceof Error ? err : new Error(String(err)), + ); + // Notify parent and exit or mark job failed + parentPort!.postMessage({ + type: 'error', + jobId: job.jobId, + error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, + }); + // still send job_completed so worker pool can continue + parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); + return; + } workerLogger(job.jobId, 'info', `Starting embedding job with ${job.tasks.length} tasks`, { threadId, @@ -98,6 +126,8 @@ parentPort.on('message', async (message: any) => { error instanceof Error ? error : new Error(String(error)), ); + // Individual task errors don't fail the entire job + // Send error notification but continue processing parentPort!.postMessage({ type: 'error', jobId, @@ -107,18 +137,14 @@ parentPort.on('message', async (message: any) => { } }); - // Notify that job is completed - parentPort!.postMessage({ - type: 'job_completed', - jobId: job.jobId, - }); - + // Job completed successfully workerLogger(job.jobId, 'info', `Embedding job completed`, { threadId, taskCount: job.tasks.length, }); } catch (error) { - // Handle job-level errors + // Job-level error - already handled by withJobUpdates + // Just log it for worker context workerLogger( job.jobId, 'error', @@ -128,14 +154,8 @@ parentPort.on('message', async (message: any) => { }, error instanceof Error ? error : new Error(String(error)), ); - - parentPort!.postMessage({ - type: 'error', - jobId: job.jobId, - error: `Job failed: ${error instanceof Error ? error.message : String(error)}`, - }); - - // Still notify completion so the worker can move to next job + } finally { + // Always notify job completion so worker can process next job parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId, diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 59700dccb..2d9d95baf 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -1,21 +1,17 @@ import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; -import { withJobUpdates } from '../utils/worker'; +import { withJobUpdates, workerLogger } from '../utils/worker'; import { addReason } from '../tasks/reason'; import { Match, MatchingJob } from '../utils/types'; import ZeroShot from '../tasks/semantic-zeroshot'; -import { Logger, createLoggerConfig } from '../utils/logger'; - -// Initialise logger for this worker thread & Initialise ZeroShot model -try { - Logger.getInstance(createLoggerConfig()); - Embedding.getInstance(); - ZeroShot.getInstance(); -} catch (error) { - // Logger already initialised -} -// Note: Verbose logging has been replaced with the new logger system +// // Initialise models on startup +// try { +// Embedding.getInstance(); +// ZeroShot.getInstance(); +// } catch (error) { +// // Models already initialised +// } /** * New matcher worker that stays alive and processes jobs sequentially @@ -24,10 +20,29 @@ if (!parentPort) { throw new Error('This file must be run as a Worker thread'); } +let modelsInitialised = false; +async function ensureModelsInitialised() { + if (modelsInitialised) return; + try { + await Embedding.getInstance(); + await ZeroShot.getInstance(); + modelsInitialised = true; + workerLogger('system', 'info', 'Matcher worker models initialized', { threadId }); + } catch (err) { + // Bubble up so job handling can report the error + throw err; + } +} + // Set up health check handler immediately, before any heavy initialisation parentPort.on('message', async (message: any) => { // Handle health checks with highest priority if (message?.type === 'health_check') { + workerLogger('system', 'debug', `Health check received: ${message.checkId}`, { + threadId, + checkId: message.checkId, + }); + parentPort!.postMessage({ type: 'health_check_response', checkId: message.checkId, @@ -36,14 +51,40 @@ parentPort.on('message', async (message: any) => { threadId: threadId, }); + workerLogger('system', 'debug', `Health check response sent: ${message.checkId}`, { + threadId, + }); return; } // Handle job messages const job = message as MatchingJob; - // Set global job context for logging - (global as any).CURRENT_JOB = job.jobId; + try { + await ensureModelsInitialised(); + } catch (err) { + workerLogger( + job.jobId || 'system', + 'error', + 'Failed to initialise models', + { threadId }, + err instanceof Error ? err : new Error(String(err)), + ); + // Notify parent and exit or mark job failed + parentPort!.postMessage({ + type: 'error', + jobId: job.jobId, + error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, + }); + // still send job_completed so worker pool can continue + parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); + return; + } + + workerLogger(job.jobId, 'info', `Starting matching job with ${job.tasks.length} tasks`, { + threadId, + taskCount: job.tasks.length, + }); try { // Store match results for reasoning workaround @@ -132,6 +173,19 @@ parentPort.on('message', async (message: any) => { }); } catch (error) { // Log error for individual match processing but continue + workerLogger( + jobId, + 'error', + `Failed to process match for task ${taskId}`, + { + threadId, + taskId, + competenceId: match.competenceId, + }, + error instanceof Error ? error : new Error(String(error)), + ); + + // Individual match errors don't fail the entire job parentPort!.postMessage({ type: 'error', jobId, @@ -141,6 +195,18 @@ parentPort.on('message', async (message: any) => { } } catch (error) { // Log error for task processing but continue with other tasks + workerLogger( + jobId, + 'error', + `Failed to process task ${taskId}`, + { + threadId, + taskId, + }, + error instanceof Error ? error : new Error(String(error)), + ); + + // Individual task errors don't fail the entire job parentPort!.postMessage({ type: 'error', jobId, @@ -161,23 +227,32 @@ parentPort.on('message', async (message: any) => { }, ); - // Notify that job is completed - parentPort!.postMessage({ - type: 'job_completed', - jobId: job.jobId, + // Job completed successfully + workerLogger(job.jobId, 'info', `Matching job completed`, { + threadId, + taskCount: job.tasks.length, }); } catch (error) { - // Handle job-level errors - parentPort!.postMessage({ - type: 'error', - jobId: job.jobId, - error: `Job failed: ${error instanceof Error ? error.message : String(error)}`, - }); - - // Still notify completion so the worker can move to next job + // Job-level error - already handled by withJobUpdates + // Just log it for worker context + workerLogger( + job.jobId, + 'error', + `Matching job failed`, + { + threadId, + }, + error instanceof Error ? error : new Error(String(error)), + ); + } finally { + // Always notify job completion so worker can process next job parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId, }); } }); + +workerLogger('system', 'info', `Matcher worker thread ready`, { + threadId, +}); diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 66f1749f7..7406b3741 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -10,10 +10,11 @@ import { getDB } from '../utils/db'; const { embeddingWorkers, matchingWorkers, - modelLoadingTimeout, + workerHealthCheckTimeout, maxWorkerRetries, workerRetryWindow, - modelLoadingTime, + systemStartupTimeout, + maxJobTime, } = config; const logger = getLogger(); @@ -38,12 +39,17 @@ class WorkerPool { private busyWorkers: Map = new Map(); // Maps worker to current jobId private pendingHealthChecks: Map = new Map(); // Track pending health checks private beingReplaced: Set = new Set(); // Track workers being replaced to prevent double replacement + private jobTimeouts: Map = new Map(); // Track job timeouts // Retry tracking private failureCount: number = 0; // Total failures for this worker type private lastFailureTime: number = 0; // Timestamp of last failure private consecutiveFailures: number = 0; // Consecutive failures without recovery + // Health check failure tracking + private consecutiveHealthCheckFailures: number = 0; // Consecutive health check failures + private poolBroken: boolean = false; // Pool marked as broken due to persistent failures + constructor(workerType: workerTypes, poolSize: number) { this.workerType = workerType; this.poolSize = poolSize; @@ -65,6 +71,15 @@ class WorkerPool { * Create a new worker and register it in the pool */ private createAndRegisterWorker() { + // Don't create new workers if pool is broken + if (this.poolBroken) { + logger.debug( + 'system', + `[WorkerPool] Not creating new ${this.workerType} worker - pool is marked as broken`, + ); + return; + } + // Check if we already have enough workers (including those pending health checks) if (this.workers.length >= this.poolSize) { logger.debug( @@ -204,6 +219,12 @@ class WorkerPool { `[WorkerPool] Assigning ${this.workerType} job ${job.jobId} to worker ${worker.threadId}`, ); + // Set up job timeout + const jobTimeout = setTimeout(() => { + this.handleJobTimeout(worker, job); + }, maxJobTime); + this.jobTimeouts.set(worker, jobTimeout); + // Set up message handling for this specific job const messageHandler = (message: any) => { try { @@ -259,6 +280,9 @@ class WorkerPool { } break; case 'job_completed': + // Clear job timeout since job completed + this.clearJobTimeout(worker); + // Job is done, mark worker as available and process next job this.markWorkerAvailable(worker); this.processNextJob(); @@ -303,6 +327,9 @@ class WorkerPool { worker.postMessage(job); options?.onOnline?.(job); } catch (error) { + // Clear timeout since job failed to start + this.clearJobTimeout(worker); + const messageError = new WorkerError( this.workerType, job.jobId, @@ -323,12 +350,79 @@ class WorkerPool { } } + /** + * Handle job timeout for a worker + */ + private handleJobTimeout(worker: Worker, job: EmbeddingJob | MatchingJob) { + const jobId = job.jobId; + + logger.workerError( + 'job_timeout', + new WorkerError(this.workerType, jobId, new Error(`Job timed out after ${maxJobTime}ms`)), + { + workerType: this.workerType, + threadId: worker.threadId, + jobId, + timeout: maxJobTime, + }, + ); + + // Clear the timeout from tracking + this.clearJobTimeout(worker); + + // Update job status in database + try { + const db = getDB(job.dbName); + db.updateJobStatus(jobId, 'failed'); + db.close(); + } catch (error) { + logger.error( + 'system', + `Failed to update job status for timed out job ${jobId}`, + error instanceof Error ? error : new Error(String(error)), + ); + } + + // Mark worker as being replaced to prevent double replacement + this.beingReplaced.add(worker); + + // Terminate the unresponsive worker + try { + worker.terminate(); + logger.debug( + 'system', + `[WorkerPool] Terminated ${this.workerType} worker ${worker.threadId} due to job timeout`, + ); + } catch (error) { + logger.debug( + 'system', + `[WorkerPool] Failed to terminate timed-out worker ${worker.threadId}:`, + error, + ); + } + + // Remove worker and create replacement + this.removeWorkerFromPool(worker); + this.createAndRegisterWorker(); + } + + /** + * Clear job timeout for a worker + */ + private clearJobTimeout(worker: Worker) { + const timeout = this.jobTimeouts.get(worker); + if (timeout) { + clearTimeout(timeout); + this.jobTimeouts.delete(worker); + } + } + /** * Perform health check on a worker to ensure it's responsive */ private performHealthCheck(worker: Worker) { const healthCheckId = `health_check_${Date.now()}_${worker.threadId}`; - const timeout = modelLoadingTimeout; + const timeout = workerHealthCheckTimeout; logger.debug( 'system', @@ -337,8 +431,8 @@ class WorkerPool { // Set up timeout for health check const healthCheckTimeout = setTimeout(() => { - // Track failure and determine logging severity - const { shouldLogAsError, retryCount } = this.trackWorkerFailure(); + // Track health check failure and check if pool should be marked as broken + const poolBroken = this.trackHealthCheckFailure(); const error = new WorkerError( this.workerType, @@ -349,16 +443,21 @@ class WorkerPool { workerType: this.workerType, threadId: worker.threadId, timeout, - retryCount, - maxRetries: maxWorkerRetries, + consecutiveHealthCheckFailures: this.consecutiveHealthCheckFailures, + maxHealthCheckFailures: 5, }; - if (shouldLogAsError) { - logger.workerError('worker_health_check_timeout', error, errorData); + if (poolBroken) { + logger.error( + 'worker', + `Worker health check timeout - pool marked as broken`, + undefined, + errorData, + ); } else { logger.warn( 'worker', - `Worker health check timeout (attempt ${retryCount}/${maxWorkerRetries})`, + `Worker health check timeout (failure ${this.consecutiveHealthCheckFailures}/5)`, errorData, ); } @@ -384,9 +483,11 @@ class WorkerPool { ); } - // Remove unresponsive worker and create a replacement + // Remove unresponsive worker and create a replacement only if pool is not broken this.removeWorkerFromPool(worker); - this.createAndRegisterWorker(); + if (!poolBroken) { + this.createAndRegisterWorker(); + } }, timeout); // Store the timeout so we can clear it if worker responds @@ -409,6 +510,7 @@ class WorkerPool { // Reset failure tracking on successful recovery this.resetFailureTracking(); + this.resetHealthCheckFailures(); logger.debug( 'system', @@ -467,12 +569,15 @@ class WorkerPool { this.beingReplaced.delete(worker); // Clean up any pending health check - const timeout = this.pendingHealthChecks.get(worker); - if (timeout) { - clearTimeout(timeout); + const healthCheckTimeout = this.pendingHealthChecks.get(worker); + if (healthCheckTimeout) { + clearTimeout(healthCheckTimeout); this.pendingHealthChecks.delete(worker); } + // Clean up any job timeout + this.clearJobTimeout(worker); + const index = this.workers.indexOf(worker); if (index > -1) { this.workers.splice(index, 1); @@ -485,6 +590,45 @@ class WorkerPool { } } + /** + * Track a health check failure and determine if pool should be marked as broken + */ + private trackHealthCheckFailure(): boolean { + this.consecutiveHealthCheckFailures++; + + // Mark pool as broken after 5 consecutive health check failures + if (this.consecutiveHealthCheckFailures >= 5) { + this.poolBroken = true; + logger.error( + 'worker', + `Pool ${this.workerType} marked as broken after ${this.consecutiveHealthCheckFailures} consecutive health check failures`, + undefined, + { + workerType: this.workerType, + consecutiveFailures: this.consecutiveHealthCheckFailures, + }, + ); + return true; + } + + return false; + } + + /** + * Reset health check failure tracking on successful health check + */ + private resetHealthCheckFailures() { + this.consecutiveHealthCheckFailures = 0; + this.poolBroken = false; + } + + /** + * Check if this pool is broken due to persistent health check failures + */ + public isBroken(): boolean { + return this.poolBroken; + } + /** * Track a worker failure and determine appropriate logging level */ @@ -523,6 +667,9 @@ class WorkerPool { this.busyWorkers.delete(worker); this.availableWorkers.add(worker); + // Clear any job timeout since job is completed + this.clearJobTimeout(worker); + // Reset failure tracking on successful job completion this.resetFailureTracking(); } @@ -538,6 +685,8 @@ class WorkerPool { busyWorkers: this.busyWorkers.size, pendingHealthChecks: this.pendingHealthChecks.size, queuedJobs: this.queue.length, + consecutiveHealthCheckFailures: this.consecutiveHealthCheckFailures, + poolBroken: this.poolBroken, }; } @@ -592,7 +741,7 @@ class WorkerManager { private async waitForWorkersReady(): Promise { logger.debug('system', '[WorkerManager] Waiting for worker pools to become ready...'); - const maxWaitTime = modelLoadingTime; + const maxWaitTime = systemStartupTimeout; const checkInterval = 500; // Check every 500ms const startTime = Date.now(); @@ -601,6 +750,20 @@ class WorkerManager { const embeddingStats = this.embeddingPool.getStats(); const matchingStats = this.matchingPool.getStats(); + // Check if any pools are broken and fail fast + if (embeddingStats.poolBroken || matchingStats.poolBroken) { + const brokenPools = []; + if (embeddingStats.poolBroken) brokenPools.push('embedding'); + if (matchingStats.poolBroken) brokenPools.push('matching'); + + reject( + new Error( + `Worker pools failed to initialise due to persistent health check failures: ${brokenPools.join(', ')}`, + ), + ); + return; + } + const embeddingReady = embeddingStats.availableWorkers > 0; const matchingReady = matchingStats.availableWorkers > 0; @@ -680,8 +843,7 @@ class WorkerManager { } /** - * Generic enqueue method for backward compatibility - * @deprecated Use enqueueEmbedding or enqueueMatching instead + * Generic enqueue method that routes jobs based on type */ public enqueue(job: any, workerScript: workerTypes, options?: JobQueueItem['options']) { if (workerScript === 'embedder') { From e6795392a4521cbe3d46fd3b71a9c6b6352f0192 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:11:39 +0200 Subject: [PATCH 23/48] Refactor reasoning and alignment handling in matcher and prompts; improve logging and classification logic --- src/competence-matcher/src/tasks/reason.ts | 48 ++++----- src/competence-matcher/src/utils/prompts.ts | 90 ++++++++++++++++- src/competence-matcher/src/worker/matcher.ts | 100 ++++++++----------- 3 files changed, 146 insertions(+), 92 deletions(-) diff --git a/src/competence-matcher/src/tasks/reason.ts b/src/competence-matcher/src/tasks/reason.ts index 45e5f3cea..c5b9fb3d0 100644 --- a/src/competence-matcher/src/tasks/reason.ts +++ b/src/competence-matcher/src/tasks/reason.ts @@ -8,16 +8,15 @@ import { getLogger } from '../utils/logger'; const { reasonModel } = config; -function getLoggerInstance() { - return getLogger(); -} - -export async function addReason(matches: T[], targetText: string): Promise { +export async function addReason( + matches: T[], + targetText: string, +): Promise { if (matches.length === 0) { return matches; // No matches to reason about } - const logger = getLoggerInstance(); + const logger = getLogger(); logger.debug('model', `Adding reasoning to ${matches.length} matches`, { targetTextLength: targetText.length, @@ -30,7 +29,7 @@ export async function addReason(matches: T[], targetText: strin ...intructPrompt, { role: 'user', - content: `Task: ${targetText}\nCompetence: ${match.text}\nSimilarity Score: ${match.distance}`, + content: `Task: ${targetText}\nCompetence: ${match.text}\nSimilarity Score: ${match.distance}\nAlignment: ${match.alignment}`, }, ]; @@ -43,14 +42,10 @@ export async function addReason(matches: T[], targetText: strin // Extract the reason from the response const reason = response.message.content.trim(); - getLoggerInstance().debug( - 'model', - `Generated reasoning for match ${index + 1}/${matches.length}`, - { - matchText: match.text.substring(0, 50) + (match.text.length > 50 ? '...' : ''), - reasonLength: reason.length, - }, - ); + logger.debug('model', `Generated reasoning for match ${index + 1}/${matches.length}`, { + matchText: match.text.substring(0, 50) + (match.text.length > 50 ? '...' : ''), + reasonLength: reason.length, + }); return { ...match, @@ -62,19 +57,14 @@ export async function addReason(matches: T[], targetText: strin error instanceof Error ? error : new Error(String(error)), ); - getLoggerInstance().error( - 'model', - 'Failed to generate reasoning for match', - reasoningError, - { - matchIndex: index, - totalMatches: matches.length, - targetTextLength: targetText.length, - matchText: match.text.substring(0, 100) + (match.text.length > 100 ? '...' : ''), - similarity: match.distance, - reasonModel, - }, - ); + logger.error('model', 'Failed to generate reasoning for match', reasoningError, { + matchIndex: index, + totalMatches: matches.length, + targetTextLength: targetText.length, + matchText: match.text.substring(0, 100) + (match.text.length > 100 ? '...' : ''), + similarity: match.distance, + reasonModel, + }); // If there's an error, just keep the original match without a reason return match; @@ -86,7 +76,7 @@ export async function addReason(matches: T[], targetText: strin (match) => 'reason' in match && match.reason, ).length; - getLoggerInstance().debug( + logger.debug( 'model', `Reasoning completed: ${successfulReasons}/${matches.length} matches received reasons`, { diff --git a/src/competence-matcher/src/utils/prompts.ts b/src/competence-matcher/src/utils/prompts.ts index 023fe5697..ffe0de3dc 100644 --- a/src/competence-matcher/src/utils/prompts.ts +++ b/src/competence-matcher/src/utils/prompts.ts @@ -109,16 +109,77 @@ export const SEMANTIC_SPLITTER: Message[] = [ * ------------------------------------------------------------- */ +// const MATCH_REASON_INTRUCT: Message = { +// role: 'system', +// content: ` +// You are an expert in generating reasons for matching scores between tasks and competences. +// Your task is to generate a reason for the matching score between a task and a competence. +// The reason should be one to three short, concise sentence that explain why the task and competence match as well as they did or why they did not match that well. +// Do not mention the similarity score in your response. +// The reason should be based on the text of the task and the competence and their estimated normalized similarity score. +// The similarity score is a number between 0 and 1, where 0 means no similarity and 1 means perfect similarity. +// Do not mention the similarity score in your response. +// `, +// }; + +// const MATCH_REASON_EXAMPLES: Message[] = [ +// { +// role: 'user', +// content: ` +// Task: Operate CNC milling machines to produce precision metal parts. +// Competence: Experience with CNC milling machines and precision machining. +// Similarity Score: 0.95 +// `, +// }, +// { +// role: 'assistant', +// content: ` +// The statements match very well because the task requires operating CNC milling machines, which is exactly what the competence is about. +// `, +// }, +// { +// role: 'user', +// content: ` +// Task: Assemble circuit boards according to schematic diagrams. +// Competence: Basic knowledge of electronics and soldering skills. +// Similarity Score: 0.65 +// `, +// }, +// { +// role: 'assistant', +// content: ` +// The the statements have a moderate match because while assembling circuit boards requires some knowledge of electronics, it does not specifically require advanced soldering skills. +// `, +// }, +// { +// role: 'user', +// content: ` +// Task: Prepare raw materials for production. +// Competence: Experience with inventory management and supply chain logistics. +// Similarity Score: 0.30 +// `, +// }, +// { +// role: 'assistant', +// content: ` +// The statements have a low match because preparing raw materials is a basic task that does not require advanced inventory management or supply chain logistics skills. +// `, +// }, +// ]; + const MATCH_REASON_INTRUCT: Message = { role: 'system', content: ` - You are an expert in generating reasons for matching scores between tasks and competences. + You are an expert in generating reasons for matching scores and their alignment between tasks and competences. Your task is to generate a reason for the matching score between a task and a competence. + In addition to the score - which is the normalized similarity score between the task and competence - you also receive an alignment label which can be one of 'aligning', 'neutral' or 'contradicting'. + The alignment label indicates whether the task and competence are well aligned ('aligning'), not really related, so do not match well nor badly ('neutral') or are in conflict with each other ('contradicting'). The reason should be one to three short, concise sentence that explain why the task and competence match as well as they did or why they did not match that well. - Do not mention the similarity score in your response. - The reason should be based on the text of the task and the competence and their estimated normalized similarity score. + Do not mention the similarity score or alignment label in your response. + The reason should be based on the text of the task and the competence and their estimated normalized similarity score and alignment. The similarity score is a number between 0 and 1, where 0 means no similarity and 1 means perfect similarity. Do not mention the similarity score in your response. + Do not mention the alignment label in your response. `, }; @@ -129,6 +190,7 @@ const MATCH_REASON_EXAMPLES: Message[] = [ Task: Operate CNC milling machines to produce precision metal parts. Competence: Experience with CNC milling machines and precision machining. Similarity Score: 0.95 + Alignment: aligning `, }, { @@ -137,12 +199,29 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The statements match very well because the task requires operating CNC milling machines, which is exactly what the competence is about. `, }, + + { + role: 'user', + content: ` + Task: Delivering packages to customers on time. Driving a delivery van safely through city traffic. Loading and unloading packages efficiently. Communicating with customers professionally. Planning optimal delivery routes using GPS technology. + Competence: Has no drivers license and cannot operate vehicles. + Similarity Score: 0.16 + Alignment: contradicting + `, + }, + { + role: 'assistant', + content: ` + The statements do not match. The task requires driving a delivery van, but the competence indicates that the person cannot operate vehicles at all. + `, + }, { role: 'user', content: ` Task: Assemble circuit boards according to schematic diagrams. Competence: Basic knowledge of electronics and soldering skills. Similarity Score: 0.65 + Alignment: aligning `, }, { @@ -156,13 +235,14 @@ const MATCH_REASON_EXAMPLES: Message[] = [ content: ` Task: Prepare raw materials for production. Competence: Experience with inventory management and supply chain logistics. - Similarity Score: 0.30 + Similarity Score: 0.49 + Alignment: neutral `, }, { role: 'assistant', content: ` - The statements have a low match because preparing raw materials is a basic task that does not require advanced inventory management or supply chain logistics skills. + The statements have a relativly low match because preparing raw materials is a basic task that does not require advanced inventory management or supply chain logistics skills. `, }, ]; diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 2d9d95baf..287cd931e 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -126,72 +126,56 @@ parentPort.on('message', async (message: any) => { // Zero-shot classification for scaling scores based on alignment const scalingLabels = ['conflicting', 'neutral', 'aligning']; - const labelScalar = [0.8, 1.0, 1.2]; + const labelScalar = [0.05, 0.25, 1]; // Process each match for (const match of matches) { - try { - let flag = 'neutral'; // Default flag + let flag = 'neutral'; // Default flag - // Apply zero-shot classification - const scalingClassification = await ZeroShot.classify( - `Task: ${description} | Competence: ${match.text}`, - scalingLabels, - ); + // Apply zero-shot classification + const scalingClassification = await ZeroShot.classify( + `Task: ${description} | Competence: ${match.text}`, + scalingLabels, + ); - if (scalingClassification) { - if ( - // @ts-ignore - ZeroShot classification result structure - scalingClassification.labels[0] === scalingLabels[2] && - // @ts-ignore - scalingClassification.scores[0] > 0.65 - ) { - // Perfect match - keep as is - match.distance *= labelScalar[2]; - flag = 'aligning'; - } + if (scalingClassification) { + if ( // @ts-ignore - ZeroShot classification result structure - else if (scalingClassification.labels[0] === scalingLabels[1]) { - // Mediocre match - scale it down - match.distance *= labelScalar[1]; - flag = 'neutral'; - } + scalingClassification.labels[0] === scalingLabels[2] && + // @ts-ignore + scalingClassification.scores[0] > 0.65 + ) { + // Perfect match - keep as is + match.distance *= labelScalar[2]; + flag = 'aligning'; + } + // @ts-ignore - ZeroShot classification result structure + else if (scalingClassification.labels[0] === scalingLabels[1]) { + // Mediocre match - scale it down + match.distance *= labelScalar[1]; + flag = 'neutral'; + } + // @ts-ignore - ZeroShot classification result structure + else if (scalingClassification.labels[0] === scalingLabels[0]) { + // Poor match - scale it down significantly + match.distance *= labelScalar[0]; + flag = 'contradicting'; } - - // Store match result for reasoning workaround - matchResults[description].push({ - jobId, - taskId, - taskText: description, - competenceId: match.competenceId, - resourceId: match.resourceId, - text: match.text, - type: match.type as 'name' | 'description' | 'proficiencyLevel', - alignment: flag, - distance: match.distance, - reason: match.reason, - }); - } catch (error) { - // Log error for individual match processing but continue - workerLogger( - jobId, - 'error', - `Failed to process match for task ${taskId}`, - { - threadId, - taskId, - competenceId: match.competenceId, - }, - error instanceof Error ? error : new Error(String(error)), - ); - - // Individual match errors don't fail the entire job - parentPort!.postMessage({ - type: 'error', - jobId, - error: `Failed to process match for task ${taskId}: ${error instanceof Error ? error.message : String(error)}`, - }); } + + // Store match result for reasoning workaround + matchResults[description].push({ + jobId, + taskId, + taskText: description, + competenceId: match.competenceId, + resourceId: match.resourceId, + text: match.text, + type: match.type as 'name' | 'description' | 'proficiencyLevel', + alignment: flag, + distance: match.distance, + reason: match.reason, + }); } } catch (error) { // Log error for task processing but continue with other tasks From 64fe77fc564b0b50671878292a0b114cfef2aa3b Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Mon, 29 Sep 2025 18:43:18 +0200 Subject: [PATCH 24/48] Add cross-encoder model integration and enhance reasoning logic - Introduced cross-encoder model configuration in config.ts. - Implemented CrossEncoder class for text classification. - Updated embedding and matcher tasks to initialize cross-encoder model. - Enhanced reasoning logic in matcher to utilize zero-shot classification for alignment scoring. - Refactored prompt messages for clarity and improved examples. --- src/competence-matcher/src/config.ts | 2 + .../src/tasks/cross-encode.ts | 76 +++ src/competence-matcher/src/tasks/embedding.ts | 5 - src/competence-matcher/src/tasks/reason.ts | 1 - .../src/tasks/semantic-split.ts | 6 +- .../src/tasks/semantic-zeroshot.ts | 91 ++- .../src/utils/huggingface.ts | 5 + src/competence-matcher/src/utils/prompts.ts | 551 +++++++++++++++++- src/competence-matcher/src/worker/embedder.ts | 4 +- src/competence-matcher/src/worker/matcher.ts | 63 +- 10 files changed, 733 insertions(+), 71 deletions(-) create mode 100644 src/competence-matcher/src/tasks/cross-encode.ts diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index 4d3d68e34..50888aecd 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -8,6 +8,8 @@ export const config = { embeddingModel: process.env.EMBEDDING_MODEL || 'onnx-community/Qwen3-Embedding-0.6B-ONNX', embeddingDim: parseInt(process.env.EMBEDDING_DIM || '1024', 10), nliModel: process.env.NLI_MODEL || 'Maxi-Lein/roberta-large-mnli-onnx', + crossEncoderModel: process.env.CROSS_ENCODER_MODEL || 'local/ms-marco-MiniLM-L6-v2-onnx', + // crossEncoderModel: process.env.CROSS_ENCODER_MODEL || 'cross-encoder/ms-marco-MiniLM-L6-v2', modelCache: process.env.MODEL_CACHE || 'src/models/', useGPU: process.env.USE_GPU === 'true' || false, port: parseInt(process.env.PORT || '8501', 10), diff --git a/src/competence-matcher/src/tasks/cross-encode.ts b/src/competence-matcher/src/tasks/cross-encode.ts new file mode 100644 index 000000000..c00439ade --- /dev/null +++ b/src/competence-matcher/src/tasks/cross-encode.ts @@ -0,0 +1,76 @@ +import { TransformerPipeline } from '../utils/model'; +import { TransformerPipelineOptions } from '../utils/types'; +import { PipelineType, TextClassificationPipeline } from '@huggingface/transformers'; +import { config } from '../config'; + +export default class CrossEncoder extends TransformerPipeline { + protected static override getPipelineOptions(): TransformerPipelineOptions { + return { + task: 'text-classification' as PipelineType, + model: config.crossEncoderModel, + options: { + model_file_name: 'model.onnx', + use_external_data_format: true, + local_files_only: true, + }, + }; + } + + /** + * Score an array of (task, candidateText) pairs. + * Returns an array of { index, raw, score } where `score` is normalised to [0,1]. + */ + public static async scorePairs(pairs: Array<{ task: string; comptence: string }>) { + if (!pairs.length) return []; + + const pipe = await this.getInstance(); + + // Convert objects to arrays-of-pairs + const inputs = pairs.map((p) => [p.task, p.comptence]); // -> string[][] + + // call the pipeline in a batch + // @ts-ignore + const out = await pipe(inputs); + + // out format varies by model/pipeline: often each item is { label, score } or {score} + // We'll defensively normalise results to [0,1]. + const sigmoid = (x: number) => 1 / (1 + Math.exp(-x)); + + console.log('CrossEncoder input:', inputs); + console.log('CrossEncoder output:', out); + + // return out.map((item: any, i: number) => { + // // Try common shapes: + // // - item = { label: 'LABEL_0', score: 0.72 } -> take score + // // - item = { scores: [..], labels: [..] } or a logits array -> handle below + // let raw: number; + + // if (typeof item === 'object' && 'score' in item && typeof item.score === 'number') { + // raw = item.score; + // } else if (Array.isArray(item) && item.length === 1 && typeof item[0].score === 'number') { + // raw = item[0].score; + // } else if (typeof item === 'object' && 'logits' in item) { + // // If logits are returned (raw model outputs), use sigmoid on the first logit + // const logits = item.logits; + // if (Array.isArray(logits)) { + // raw = sigmoid(logits[0]); + // } else { + // raw = 0; + // } + // } else if (typeof item === 'number') { + // // sometimes minimal wrappers return a numeric score + // raw = item; + // } else { + // // fallback: try to find a numeric value in the object + // const val = Object.values(item).find((v) => typeof v === 'number'); + // raw = typeof val === 'number' ? val : 0; + // } + + // // if raw is outside 0..1, apply sigmoid as fallback (common if raw is logit) + // let score = raw; + // if (score < 0 || score > 1) score = sigmoid(raw); + + // return { index: pairs[i].index, raw, score }; + // }); + } +} diff --git a/src/competence-matcher/src/tasks/embedding.ts b/src/competence-matcher/src/tasks/embedding.ts index 39356cfac..4148e6980 100644 --- a/src/competence-matcher/src/tasks/embedding.ts +++ b/src/competence-matcher/src/tasks/embedding.ts @@ -6,11 +6,6 @@ import { import { config } from '../config'; import { TransformerPipeline } from '../utils/model'; import { TransformerPipelineOptions } from '../utils/types'; -import { getLogger } from '../utils/logger'; - -function getLoggerInstance() { - return getLogger(); -} export default class Embedding extends TransformerPipeline { protected static override getPipelineOptions(): TransformerPipelineOptions { diff --git a/src/competence-matcher/src/tasks/reason.ts b/src/competence-matcher/src/tasks/reason.ts index c5b9fb3d0..11480331f 100644 --- a/src/competence-matcher/src/tasks/reason.ts +++ b/src/competence-matcher/src/tasks/reason.ts @@ -32,7 +32,6 @@ export async function addReason( content: `Task: ${targetText}\nCompetence: ${match.text}\nSimilarity Score: ${match.distance}\nAlignment: ${match.alignment}`, }, ]; - try { const response = await ollama.chat({ model: reasonModel, diff --git a/src/competence-matcher/src/tasks/semantic-split.ts b/src/competence-matcher/src/tasks/semantic-split.ts index a8e13626e..16dc980a3 100644 --- a/src/competence-matcher/src/tasks/semantic-split.ts +++ b/src/competence-matcher/src/tasks/semantic-split.ts @@ -13,12 +13,8 @@ const { splittingLength: MIN_TEXT_LENGTH, } = config; -function getLoggerInstance() { - return getLogger(); -} - export async function splitSemantically(tasks: EmbeddingTask[]): Promise { - const logger = getLoggerInstance(); + const logger = getLogger(); const splittedTasks: EmbeddingTask[] = []; const toSplit: { task: EmbeddingTask; messages: Message[] }[] = []; diff --git a/src/competence-matcher/src/tasks/semantic-zeroshot.ts b/src/competence-matcher/src/tasks/semantic-zeroshot.ts index 7b9ce6227..fa3454842 100644 --- a/src/competence-matcher/src/tasks/semantic-zeroshot.ts +++ b/src/competence-matcher/src/tasks/semantic-zeroshot.ts @@ -6,11 +6,8 @@ import { import { config } from '../config'; import { TransformerPipeline } from '../utils/model'; import { TransformerPipelineOptions } from '../utils/types'; -import { getLogger } from '../utils/logger'; -function getLoggerInstance() { - return getLogger(); -} +export const labels = ['entailment', 'neutral statement', 'contradiction or not related']; export default class ZeroShot extends TransformerPipeline { protected static override getPipelineOptions(): TransformerPipelineOptions { @@ -18,9 +15,6 @@ export default class ZeroShot extends TransformerPipeline { - // logger.debug("system", `Embedding progress: ${progress}`); - // }, model_file_name: 'model.onnx', use_external_data_format: true, local_files_only: true, @@ -29,10 +23,9 @@ export default class ZeroShot extends TransformerPipeline(); return pipe(text, _labels, { hypothesis_template }); } + + /** + * Run MNLI-style check in one direction: premise -> hypothesis sentence string. + * Returns object { entail, neutral, contradict } with values in [0,1]. + */ + private static async nliDirection(premise: string, hypothesis: string) { + const pipe = await this.getInstance(); + // Reuse zero-shot pipeline: pass premise as text and labels that (when inserted into template) + // produce a hypothesis that matches desired explicit hypothesis. + // Instead, easier: pass candidate_labels matching MNLI classes and set hypothesis_template to "{}" + const hypothesis_template = `${hypothesis}`; // pipeline will insert label into template; we want explicit hypothesis + // Some transformer-js wrappers may require candidate_labels to be "labels", and will return { labels: [...], scores: [...] } + const out = await pipe(premise, labels, { hypothesis_template }); + // out may be e.g. { labels: ['entailment','neutral','contradiction'], scores: [0.7,0.2,0.1] } + const mapping: Record = {}; + if ( + out && + typeof out === 'object' && + !Array.isArray(out) && + 'labels' in out && + 'scores' in out && + Array.isArray((out as any).labels) && + Array.isArray((out as any).scores) + ) { + (out as any).labels.forEach( + (lbl: string, i: number) => (mapping[lbl] = (out as any).scores[i]), + ); + } else if (Array.isArray(out) && out.length) { + // Some versions return array of {label, score} objects + out.forEach((item: any) => { + if (item.label && typeof item.score === 'number') mapping[item.label] = item.score; + }); + } else { + // fallback: return zeros + return { entail: 0, neutral: 0, contradict: 0 }; + } + + return { + entail: mapping[labels[0]] ?? 0, + neutral: mapping[labels[1]] ?? 0, + contradict: mapping[labels[2]] ?? 0, + }; + } + + /** + * Run NLI both directions (premise=capability -> hypothesis: task, + * and premise=task -> hypothesis: capability). Returns aggregated features. + */ + public static async nliBiDirectional(task: string, capability: string) { + // Build explicit hypothesis sentences + const h1 = `This is a/an {} to the task: ${task}.`; // capability as premise -> can they perform the task? + const h2 = `The described task is a/an {} to the capability: ${capability}.`; // task as premise -> does it require the capability? + + const [dir1, dir2] = await Promise.all([ + this.nliDirection(capability, h1), + this.nliDirection(task, h2), + ]); + + const result = { + entail: (dir1.entail + dir2.entail) / 2, + neutral: (dir1.neutral + dir2.neutral) / 2, + contradict: Math.max(dir1.contradict, dir2.contradict), + details: { 'competence on task': dir1, 'task on competence': dir2 }, + }; + + // Order the labels by their score in descending order + const ranking = ['entail', 'neutral', 'contradict'].sort( + // @ts-ignore + (a, b) => result[b] - result[a], + ); + + return { + ...result, + ranking, + }; + } } diff --git a/src/competence-matcher/src/utils/huggingface.ts b/src/competence-matcher/src/utils/huggingface.ts index 80125f01f..cd43026e1 100644 --- a/src/competence-matcher/src/utils/huggingface.ts +++ b/src/competence-matcher/src/utils/huggingface.ts @@ -1,3 +1,4 @@ +import CrossEncoder from '../tasks/cross-encode'; import Embedding from '../tasks/embedding'; import ZeroShotSemanticOpposites from '../tasks/semantic-zeroshot'; import { HuggingFaceModelError } from './errors'; @@ -23,11 +24,15 @@ export async function ensureAllHuggingfaceModelsAreAvailable() { logger.debug('model', 'Initialising zero-shot semantic opposites model...'); await ZeroShotSemanticOpposites.getInstance(); + logger.debug('model', 'Initialising cross-encoder model...'); + await CrossEncoder.getInstance(); + logger.modelInfo('All HuggingFace models initialised successfully'); // Delete instances to free up memory as they will be reloaded in worker threads Embedding.deleteInstance(); ZeroShotSemanticOpposites.deleteInstance(); + CrossEncoder.deleteInstance(); } catch (error) { throw new HuggingFaceModelError( 'unknown', // We don't know which specific model failed - will maybe add later diff --git a/src/competence-matcher/src/utils/prompts.ts b/src/competence-matcher/src/utils/prompts.ts index ffe0de3dc..df3dc5d48 100644 --- a/src/competence-matcher/src/utils/prompts.ts +++ b/src/competence-matcher/src/utils/prompts.ts @@ -167,13 +167,17 @@ export const SEMANTIC_SPLITTER: Message[] = [ // }, // ]; -const MATCH_REASON_INTRUCT: Message = { +const MATCH_REASON_INSTRUCT_OLD: Message = { role: 'system', content: ` You are an expert in generating reasons for matching scores and their alignment between tasks and competences. Your task is to generate a reason for the matching score between a task and a competence. In addition to the score - which is the normalized similarity score between the task and competence - you also receive an alignment label which can be one of 'aligning', 'neutral' or 'contradicting'. The alignment label indicates whether the task and competence are well aligned ('aligning'), not really related, so do not match well nor badly ('neutral') or are in conflict with each other ('contradicting'). + Generally speaking, a score of 0 means not suited, where not suited can either mean, not suited at all or just not really suited (i.e. the capability and task either contradict or are not overlapping in terms of competences, e.g. they are unrelated). + A score of 1 means perfectly suited (i.e. the capability fully covers the task). + Hence, everything larger than 0 already indicates some degree of suitability. + A match score of e.g. 0.15 is already slightly suited, 0.5 indicates that the resource is somewhat well suited to perform the task, 0.7 would indicate that the resource is quite well suited to perform the task, and everything above 0.85 and 1.0 means close to perfectly suited. The reason should be one to three short, concise sentence that explain why the task and competence match as well as they did or why they did not match that well. Do not mention the similarity score or alignment label in your response. The reason should be based on the text of the task and the competence and their estimated normalized similarity score and alignment. @@ -189,7 +193,7 @@ const MATCH_REASON_EXAMPLES: Message[] = [ content: ` Task: Operate CNC milling machines to produce precision metal parts. Competence: Experience with CNC milling machines and precision machining. - Similarity Score: 0.95 + Similarity Score: 0.91 Alignment: aligning `, }, @@ -205,7 +209,7 @@ const MATCH_REASON_EXAMPLES: Message[] = [ content: ` Task: Delivering packages to customers on time. Driving a delivery van safely through city traffic. Loading and unloading packages efficiently. Communicating with customers professionally. Planning optimal delivery routes using GPS technology. Competence: Has no drivers license and cannot operate vehicles. - Similarity Score: 0.16 + Similarity Score: 0.0 Alignment: contradicting `, }, @@ -220,14 +224,14 @@ const MATCH_REASON_EXAMPLES: Message[] = [ content: ` Task: Assemble circuit boards according to schematic diagrams. Competence: Basic knowledge of electronics and soldering skills. - Similarity Score: 0.65 + Similarity Score: 0.08 Alignment: aligning `, }, { role: 'assistant', content: ` - The the statements have a moderate match because while assembling circuit boards requires some knowledge of electronics, it does not specifically require advanced soldering skills. + The the statements only slightly match because while assembling circuit boards requires some knowledge of electronics, it does not specifically require advanced soldering skills. `, }, { @@ -235,19 +239,552 @@ const MATCH_REASON_EXAMPLES: Message[] = [ content: ` Task: Prepare raw materials for production. Competence: Experience with inventory management and supply chain logistics. - Similarity Score: 0.49 + Similarity Score: 0 Alignment: neutral `, }, { role: 'assistant', content: ` + The task and competence do not match because preparing raw materials is a basic task that does not require advanced inventory management or supply chain logistics skills. + `, + }, + //////////////////////////////////////////////////////////////// + { + role: 'user', + content: ` + Task: Lead Agile Scrum teams, facilitate sprint planning and retrospectives. + Competence: Certified Scrum Master with 5 years of experience running scrum ceremonies. + Similarity Score: 0.95 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence directly covers leading Scrum teams and facilitating ceremonies, which matches the task's requirements precisely. + `, + }, + + { + role: 'user', + content: ` + Task: Develop native iOS applications in Swift and ship to the App Store. + Competence: Experienced Android engineer, Kotlin and Jetpack Compose. + Similarity Score: 0.25 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence demonstrates mobile-app development experience but on a different platform and language, so it provides some transferable skills but not a direct match. + `, + }, + + { + role: 'user', + content: ` + Task: Translate legal contracts from English to German ensuring legal terminology is correct. + Competence: Native German translator with certification in legal translation. + Similarity Score: 0.92 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence combines native German proficiency with a legal translation credential, matching the task's need for accurate legal terminology. + `, + }, + + { + role: 'user', + content: ` + Task: Manage payroll, calculate taxes, and file payroll reports. + Competence: Certified public accountant (CPA) with payroll processing experience. + Similarity Score: 0.89 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence includes both the certification and hands-on payroll experience required to perform payroll calculation and filings. + `, + }, + + { + role: 'user', + content: ` + Task: Operate hydraulic excavators on construction sites. + Competence: Forklift operator certified with several years of warehouse experience. + Similarity Score: 0.12 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + Both roles involve operating heavy equipment, but the specific machinery and skills differ, so the relevance is limited. + `, + }, + + { + role: 'user', + content: ` + Task: Write unit tests in Python using pytest and maintain test coverage. + Competence: Experienced in writing unit tests in Java with JUnit and some general testing knowledge. + Similarity Score: 0.45 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence shows solid testing experience but in a different language and framework, making it partly relevant but not a perfect fit. + `, + }, + + { + role: 'user', + content: ` + Task: Handle inbound customer support calls with empathy and problem resolution. + Competence: Strong written communication skills and two years of live chat customer support. + Similarity Score: 0.55 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence demonstrates customer-facing communication and support experience that transfers well to voice support, though the channel differs. + `, + }, + + { + role: 'user', + content: ` + Task: Perform blood draws (phlebotomy) and prepare specimens for lab testing. + Competence: Certified phlebotomist with clinical experience. + Similarity Score: 0.99 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence explicitly covers phlebotomy certification and clinical practice, which directly matches the task's duties. + `, + }, + + { + role: 'user', + content: ` + Task: Conduct workplace hazard assessments and maintain safety documentation. + Competence: OSHA 30 certification and experience conducting safety audits. + Similarity Score: 0.87 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence shows the required safety certification and audit experience needed to perform hazard assessments and documentation. + `, + }, + + { + role: 'user', + content: ` + Task: Build high-frequency trading systems in low-latency C++. + Competence: Strong Python data-analysis background and finance domain knowledge. + Similarity Score: 0.30 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence provides valuable finance and data skills, but lacks the specific low-latency C++ systems expertise required for the task. + `, + }, + + { + role: 'user', + content: ` + Task: Lead a team of graphic designers to create brand identity and assets. + Competence: Senior UX designer with a portfolio of visual branding and team leadership. + Similarity Score: 0.70 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence combines visual branding experience and leadership, which fits the task of leading designers to create brand assets. + `, + }, + + { + role: 'user', + content: ` + Task: Translate complex medical research articles accurately into English. + Competence: Marketing copywriter with no medical background. + Similarity Score: 0.0 + Alignment: contradicting + `, + }, + { + role: 'assistant', + content: ` + The task requires specialized medical knowledge and terminology that the competence does not provide. + `, + }, + + { + role: 'user', + content: ` + Task: Ensure PCI-level data protection and handle cardholder data securely. + Competence: History of incidents involving mishandling of sensitive customer data. + Similarity Score: 0.0 + Alignment: contradicting + `, + }, + { + role: 'assistant', + content: ` + The task requires strict data security practices, but the competence indicates past failures to protect sensitive information, creating a direct conflict. + `, + }, + /////////////////////// + { + role: 'user', + content: ` + Task: Install and configure Nginx as a reverse proxy with TLS termination for multiple web services. + Competence: Experience configuring Nginx, handling TLS certificates, and reverse proxy setups for web applications. + Similarity Score: 0.92 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence explicitly covers Nginx configuration, TLS certificate handling, and reverse-proxy setups, which directly match the task. That operational experience maps to the task's technical requirements. Overall this is a strong, practical fit. + `, + }, + + { + role: 'user', + content: ` + Task: Write automated end-to-end tests for a React web application using Playwright. + Competence: Experienced in unit/integration testing in React and in end-to-end tests using Selenium. + Similarity Score: 0.46 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence shows solid testing experience and familiarity with React, which transfers to end-to-end testing concepts. However, the specific test framework differs and Selenium's patterns are not identical to Playwright's modern APIs. The fit is moderately useful but incomplete without Playwright-specific experience. + `, + }, + + { + role: 'user', + content: ` + Task: Manage emergency room triage, including rapid patient assessment and prioritization. + Competence: Registered nurse with prior ER triage experience and ACLS certification. + Similarity Score: 0.99 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence explicitly describes ER triage experience and advanced life support certification, which match the task's clinical and triage responsibilities. Those credentials and practical duties map directly to rapid patient assessment and prioritization. This competence is therefore highly appropriate for the role. + `, + }, + { + role: 'user', + content: ` + Task: Operate retail point-of-sale (POS) systems and handle daily cash reconciliation. + Competence: Previous retail cashier experience including operating POS terminals and performing end-of-day cash counts. + Similarity Score: 0.74 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence lists the exact operational duties — running POS systems and reconciling cash — which align with the task. That hands-on experience covers most daily requirements and reduces training needs. Overall the candidate is well suited for this role. + `, + }, + { + role: 'user', + content: ` + Task: Ensure GDPR-compliant handling of personal data, manage consent records, and perform regular data protection impact assessments. + Competence: Proven track record implementing data protection policies and running DPIAs. + Similarity Score: 0.81 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence demonstrates direct experience implementing data-protection policies and performing impact assessments, which correspond to the task's key responsibilities. Those policy and assessment skills indicate strong familiarity with GDPR-related workflows and controls. The competence is therefore a very good fit for the task. + `, + }, + + { + role: 'user', + content: ` + Task: Drive a passenger shuttle daily and secure vehicle inspections. + Competence: Medical restriction explicitly prohibiting driving. + Similarity Score: 0.60 + Alignment: contradicting + `, + }, + { + role: 'assistant', + content: ` + Although the numeric score suggests some overlap, the competence text explicitly prohibits driving while the task requires operating a vehicle. This direct conflict with the task's core requirement prevents the candidate from performing the role. The competence therefore does not meet the task. + `, + }, + ///////////////// + { + role: 'user', + content: ` + Task: Assemble circuit boards according to schematic diagrams. + Competence: Basic knowledge of electronics and soldering skills. + Similarity Score: 0.08 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The statements only slightly match because assembling circuit boards requires more specific hands-on PCB assembly experience than basic electronics knowledge. Basic soldering helps, but the competence doesn't explicitly show the detailed assembly practice the task expects. The overlap is therefore minimal. + `, + }, + + { + role: 'user', + content: ` + Task: Translate engineering user manuals from Chinese to English, preserving technical terms and measurements. + Competence: Professional Chinese-to-English translator with experience in marketing and legal documents, no engineering background. + Similarity Score: 0.12 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence provides strong language and translation skills but lacks the technical engineering background needed to preserve specialized terminology and measurements. That domain gap limits suitability for technical manual translation without subject-matter support. The fit is therefore only slight. + `, + }, + { + role: 'user', + content: ` + Task: Administer Microsoft SQL Server databases, tune queries and manage backups. + Competence: Extensive experience with MySQL and PostgreSQL administration. + Similarity Score: 0.40 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence indicates strong database administration skills and general principles that transfer across systems, but it lacks direct experience with Microsoft SQL Server-specific tools and features. That gap will require adaptation to server-specific administration and tuning. The candidate is moderately relevant but not an immediate fit. + `, + }, + { + role: 'user', + content: ` + Task: Improve front-end performance for a React single-page app, including code-splitting and profiling render hotspots. + Competence: Experienced React developer who has optimized bundle sizes and refactored components but limited experience with advanced profiler workflows. + Similarity Score: 0.50 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence brings directly relevant front-end and React experience, including bundle optimization and component improvements. However, the candidate's limited exposure to advanced profiling workflows means they may need to upskill for deep-render hotspot analysis. Overall, they are reasonably well placed to improve performance but may require some additional profiling experience. + `, + }, + + { + role: 'user', + content: ` + Task: Build SQL analytics queries and ETL jobs in Amazon Redshift, optimize for large datasets. + Competence: Strong SQL and analytics experience with PostgreSQL and routine ETL pipelines, but no Redshift-specific deployments. + Similarity Score: 0.52 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence demonstrates solid SQL and ETL experience that transfers to analytics work, and understanding of query design is directly useful for Redshift. The lack of Redshift-specific tuning and cluster knowledge is a remaining gap that will affect large-scale optimizations. The candidate is generally well suited but will need platform-specific tuning skills. + `, + }, + + { + role: 'user', + content: ` + Task: Onboard mid-market customers to a SaaS product and drive adoption through structured training and playbooks. + Competence: Account manager with experience in customer onboarding for software services, but mostly for on-premise deployments and manual training sessions. + Similarity Score: 0.48 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence shows relevant onboarding and customer engagement experience that transfers to SaaS adoption activities. However, the difference between on-premise manual onboarding and recurring SaaS flows means playbook-driven, product-led onboarding skills may be missing. The candidate can perform many onboarding tasks but will need to adapt to SaaS-specific tooling and processes. + `, + }, + + { + role: 'user', + content: ` + Task: Deploy machine-learning models to production with CI/CD, containerization, and monitoring. + Competence: ML researcher experienced in model development and validation, with limited DevOps/infrastructure deployment experience. + Similarity Score: 0.47 + Alignment: neutral + `, + }, + { + role: 'assistant', + content: ` + The competence is strong on model development and evaluation, which covers a critical portion of the workflow. However, the task requires production deployment skills—CI/CD pipelines, containerization, and monitoring—that are not evident in the competence. The fit is partial: good for the modeling side but not yet complete for end-to-end deployment. + `, + }, + + { + role: 'user', + content: ` + Task: Provide German-language phone-based customer support for technical B2B software. + Competence: Native German speaker with one year of live chat support experience on a technical product. + Similarity Score: 0.55 + Alignment: aligning + `, + }, + { + role: 'assistant', + content: ` + The competence supplies native-level language skills and hands-on technical support experience, which transfer well to customer-facing roles. Phone-based support requires real-time verbal communication and some nuances compared with chat, but the underlying technical troubleshooting ability is present. Overall this competence is a good fit, with small channel-specific adaptation required. + `, + }, +]; + +const MATCH_REASON_INSTRUCT: Message = { + role: 'system', + content: ` +You are an expert in generating concise reasons that explain why a Task and a Competence match to the degree they do. + +Core rules: +- Produce exactly 1–3 short, concise sentences that explain why the task and competence matched (or did not). Use active voice and concrete wording. +- Base the reason only on the text of the Task and the Competence and implicitly on the provided similarity score and alignment label. Do NOT invent facts. +- Do NOT mention the numeric similarity score or the alignment label in the reason. +- Avoid speculation, generic filler, or unrelated information. Keep reasons grounded in wording present in the two texts. +- If texts explicitly conflict (for example, task requires driving and competence states inability to drive), explain the conflict clearly and directly without naming the label. +- When the texts imply partial transferability (different platform/language/tool but overlapping concepts), explain which parts transfer and which gaps remain. +- Keep tone neutral and factual. + +Score interpretation (matches your semantics): +- 0.00 : Not suited — no suitability (either direct contradiction or no overlap). +- Greater than 0 up to 0.20 (0 < score ≤ 0.20) : Slightly suited — small degree of relevance or very limited transferability. +- Greater than 0.20 up to 0.50 (0.20 < score ≤ 0.50) : Somewhat suited — a noticeable but incomplete overlap; useful for partial tasks or with training. +- Greater than 0.50 up to 0.70 (0.50 < score ≤ 0.70) : Quite well suited — competence covers most requirements with small gaps. +- Greater than 0.70 up to 0.85 (0.70 < score ≤ 0.85) : Very well suited — strong coverage of task requirements. +- Greater than 0.85 up to 1.00 (0.85 < score ≤ 1.00) : Close to perfectly suited — competence largely or fully covers the task. + +Important handling of inconsistencies: +- If the numeric score and the alignment label conflict (for example, a nonzero high score but an alignment that indicates explicit contradiction), prefer the alignment label when the competence text clearly contradicts the task. In that case, produce a reason that highlights the explicit conflicting statements in the texts. +- Otherwise, use the score and label implicitly to set the tone and specificity of the reason. + +Output constraints: +- 1–3 short sentences only. +- Do NOT include numeric values or alignment labels in the output. +- Be specific: reference the precise skill, tool, domain, or limitation that explains the match degree. +`, +}; + +// export const MATCH_REASON: Message[] = [MATCH_REASON_INTRUCT, ...MATCH_REASON_EXAMPLES]; + +const MATCH_REASON_INTSRUCT_2: Message = { + role: 'system', + content: ` + You are an expert in generating matching scores based on reason between tasks and competences. + Your task is to generate a score, how well the resource with the respective capability is suited to fulfill the given task. + The score should be a number (floating point) between 0 and 1, where 0 means either not suited (i.e. the capability and task either contradict or are not overlapping in terms of competences, e.g. they are unrelated) and 1 means perfectly suited (i.e. the capability fully covers the task). + So values larger than 0 already indicate some degree of suitability, while values close to 1 indicate a very good match. + Something that is neither well suited nor unsuited should still be rated with 0. + 0.25 would indicate that the resource is only slightly suited to perform the task. + So 0.5 is not a neutral value, but rather indicates that the resource is somewhat well suited to perform the task, but not very well suited. + 0.75 would indicate that the resource is quite well suited to perform the task, but not perfectly suited. + The reason should be one to three short, concise sentence that explain why the task and competence match as well as they did or why they did not match that well. + The reason should be based on the text of the task and the competence and their estimated normalized similarity score and alignment. + The similarity score is a number between 0 and 1. + Your response should be in the following format: + + ${splittingSymbol} + + `, +}; + +const MATCH_REASON_EXAMPLES_2: Message[] = [ + { + role: 'user', + content: ` + Task: Operate CNC milling machines to produce precision metal parts. + Competence: Experience with CNC milling machines and precision machining. + `, + }, + { + role: 'assistant', + content: ` + 0.95 + ${splittingSymbol} + The statements match very well because the task requires operating CNC milling machines, which is exactly what the competence is about. + `, + }, + + { + role: 'user', + content: ` + Task: Delivering packages to customers on time. Driving a delivery van safely through city traffic. Loading and unloading packages efficiently. Communicating with customers professionally. Planning optimal delivery routes using GPS technology. + Competence: Has no drivers license and cannot operate vehicles. + `, + }, + { + role: 'assistant', + content: ` + 0.0 + ${splittingSymbol} + The statements do not match. The task requires driving a delivery van, but the competence indicates that the person cannot operate vehicles at all. + `, + }, + { + role: 'user', + content: ` + Task: Prepare raw materials for production. + Competence: Experience with inventory management and supply chain logistics. + `, + }, + { + role: 'assistant', + content: ` + 0.30 + ${splittingSymbol} The statements have a relativly low match because preparing raw materials is a basic task that does not require advanced inventory management or supply chain logistics skills. `, }, ]; -export const MATCH_REASON: Message[] = [MATCH_REASON_INTRUCT, ...MATCH_REASON_EXAMPLES]; +export const MATCH_REASON: Message[] = [MATCH_REASON_INSTRUCT, ...MATCH_REASON_EXAMPLES]; /** * ------------------------------------------------------------- diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index 83543147d..28b70ae73 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -24,7 +24,7 @@ async function ensureModelsInitialised() { try { await Embedding.getInstance(); modelsInitialised = true; - workerLogger('system', 'info', 'Embedder worker model initialized', { threadId }); + workerLogger('system', 'info', 'Embedder worker model initialised', { threadId }); } catch (err) { throw err; } @@ -56,7 +56,7 @@ parentPort.on('message', async (message: any) => { // Handle job messages const job = message as EmbeddingJob; - // ensure models are initialized (but do not run this for health_check) + // ensure models are initialised (but do not run this for health_check) try { await ensureModelsInitialised(); } catch (err) { diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 287cd931e..78bccddff 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -3,7 +3,8 @@ import Embedding from '../tasks/embedding'; import { withJobUpdates, workerLogger } from '../utils/worker'; import { addReason } from '../tasks/reason'; import { Match, MatchingJob } from '../utils/types'; -import ZeroShot from '../tasks/semantic-zeroshot'; +import ZeroShot, { labels } from '../tasks/semantic-zeroshot'; +import CrossEncoder from '../tasks/cross-encode'; // // Initialise models on startup // try { @@ -27,7 +28,7 @@ async function ensureModelsInitialised() { await Embedding.getInstance(); await ZeroShot.getInstance(); modelsInitialised = true; - workerLogger('system', 'info', 'Matcher worker models initialized', { threadId }); + workerLogger('system', 'info', 'Matcher worker models initialised', { threadId }); } catch (err) { // Bubble up so job handling can report the error throw err; @@ -120,47 +121,29 @@ parentPort.on('message', async (message: any) => { }, }); - // TODO: Re-enable reasoning once worker stability issues are resolved - // Apply reasoning to each match to enhance context - // matches = await addReason(description, matches); - - // Zero-shot classification for scaling scores based on alignment - const scalingLabels = ['conflicting', 'neutral', 'aligning']; - const labelScalar = [0.05, 0.25, 1]; - // Process each match for (const match of matches) { let flag = 'neutral'; // Default flag - // Apply zero-shot classification - const scalingClassification = await ZeroShot.classify( - `Task: ${description} | Competence: ${match.text}`, - scalingLabels, - ); - - if (scalingClassification) { - if ( - // @ts-ignore - ZeroShot classification result structure - scalingClassification.labels[0] === scalingLabels[2] && - // @ts-ignore - scalingClassification.scores[0] > 0.65 - ) { - // Perfect match - keep as is - match.distance *= labelScalar[2]; - flag = 'aligning'; - } - // @ts-ignore - ZeroShot classification result structure - else if (scalingClassification.labels[0] === scalingLabels[1]) { - // Mediocre match - scale it down - match.distance *= labelScalar[1]; - flag = 'neutral'; - } - // @ts-ignore - ZeroShot classification result structure - else if (scalingClassification.labels[0] === scalingLabels[0]) { - // Poor match - scale it down significantly - match.distance *= labelScalar[0]; - flag = 'contradicting'; - } + // Balance distance + let newDistance = Math.min(1, Math.max(0, match.distance - 0.45) * 2); + + // Get Alignment via Zero-Shot + const alignment = await ZeroShot.nliBiDirectional(description, match.text); + + // First: Contradicting? + if (alignment.ranking[0] == 'contradict' || alignment.contradict > 0.3) { + flag = 'contradicting'; + newDistance = 0.0; + // Second: Aligning? + } else if (alignment.entail > 0.55 && match.distance > 0.65) { + flag = 'aligning'; + // Boost similarity-based distance + newDistance = Math.min(1, newDistance * 1.65); + } else { + flag = 'neutral'; + // Reduce distance for neutral + newDistance *= 0.75; } // Store match result for reasoning workaround @@ -173,7 +156,7 @@ parentPort.on('message', async (message: any) => { text: match.text, type: match.type as 'name' | 'description' | 'proficiencyLevel', alignment: flag, - distance: match.distance, + distance: newDistance, reason: match.reason, }); } From f76b8b09a4b26e8dc71363e5fcfc9300bdfbeb3c Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Tue, 30 Sep 2025 13:06:16 +0200 Subject: [PATCH 25/48] Enhance ZeroShot integration: add contradiction and alignment checks in matcher --- .../src/tasks/semantic-zeroshot.ts | 122 +++++++++++++++++- src/competence-matcher/src/worker/matcher.ts | 22 +++- 2 files changed, 134 insertions(+), 10 deletions(-) diff --git a/src/competence-matcher/src/tasks/semantic-zeroshot.ts b/src/competence-matcher/src/tasks/semantic-zeroshot.ts index fa3454842..662c7d63a 100644 --- a/src/competence-matcher/src/tasks/semantic-zeroshot.ts +++ b/src/competence-matcher/src/tasks/semantic-zeroshot.ts @@ -91,16 +91,20 @@ export default class ZeroShot extends TransformerPipeline can they perform the task? const h2 = `The described task is a/an {} to the capability: ${capability}.`; // task as premise -> does it require the capability? - const [dir1, dir2] = await Promise.all([ + const h3 = `The task and capability are a/an {}.`; // symmetric + const mix = `Task: ${task}\nCapability: ${capability}`; + + const [dir1, dir2, dir3] = await Promise.all([ this.nliDirection(capability, h1), this.nliDirection(task, h2), + this.nliDirection(mix, h3), ]); const result = { - entail: (dir1.entail + dir2.entail) / 2, - neutral: (dir1.neutral + dir2.neutral) / 2, - contradict: Math.max(dir1.contradict, dir2.contradict), - details: { 'competence on task': dir1, 'task on competence': dir2 }, + entail: (dir1.entail + dir2.entail + dir3.entail) / 3, + neutral: (dir1.neutral + dir2.neutral + dir3.neutral) / 3, + contradict: Math.max(dir1.contradict, dir2.contradict, dir3.contradict), // max contradict + details: { 'competence on task': dir1, 'task on competence': dir2, combined: dir3 }, }; // Order the labels by their score in descending order @@ -114,4 +118,112 @@ export default class ZeroShot extends TransformerPipeline(); + + const h1 = `The capability is {} to the task: ${task}.`; // capability as premise -> can they perform the task? + const h2 = `The task is {} to the capability: ${capability}.`; // task as premise -> does it require the capability? + + const h3 = `The task and capability are {}.`; // symmetric + const mix = `Task: ${task}\nCapability: ${capability}`; + + const [out1, out2, out3] = await Promise.all([ + pipe(capability, labels, { hypothesis_template: h1 }), + pipe(task, labels, { hypothesis_template: h2 }), + pipe(mix, labels, { hypothesis_template: h3 }), + ]); + + // console.log('________________________________________'); + // console.log('contradiction check results:'); + // console.log('task', task); + // console.log('capability', capability); + // console.log(out1, out2, out3); + // console.log('________________________________________'); + + const sortedOut = labels.reduce( + (acc: Record, label: string) => { + if (acc[label] === undefined) acc[label] = []; + [out1, out2, out3].forEach((out) => { + // Find index of label in out + const idx = ((out as any).labels as string[]).indexOf(label); + // Add corresponding score + if (idx >= 0) acc[label].push((out as any).scores[idx]); + }); + + return acc; + }, + {} as Record, + ); + + const result = { + max: Math.max(...(sortedOut[labels[0]] || [0])), + avg: + (sortedOut[labels[0]] || [0]).reduce((sum, val) => sum + val, 0) / + (sortedOut[labels[0]] || [0]).length, + details: { 'competence on task': out1, 'task on competence': out2, combined: out3 }, + }; + + return { + ...result, + contradicting: result.max > 0.5 || result.avg > 0.45, + }; + } + + public static async alignmentCheck(task: string, capability: string) { + const labels = ['somewhat sufficently', 'only partially', 'not at all']; + const pipe = await this.getInstance(); + + const h1 = `The capability is meeting the requirements of the task "${task}" {}.`; + const h2 = `The tasks requirements are met {} by the capability: "${capability}".`; + + const h3 = `The task and capability are {} matching.`; // symmetric + const mix = `Task: "${task}"\nCapability: "${capability}"`; + + const [out1, out2, out3] = await Promise.all([ + pipe(capability, labels, { hypothesis_template: h1 }), + pipe(task, labels, { hypothesis_template: h2 }), + pipe(mix, labels, { hypothesis_template: h3 }), + ]); + + // console.log('________________________________________'); + // console.log('alignment check results:'); + // console.log('task: ', task); + // console.log('capability: ', capability); + // console.log(out1, out2, out3); + // console.log('________________________________________'); + + const sortedOut = labels.reduce( + (acc: Record, label: string) => { + if (acc[label] === undefined) acc[label] = []; + [out1, out2, out3].forEach((out) => { + // Find index of label in out + const idx = ((out as any).labels as string[]).indexOf(label); + // Add corresponding score + if (idx >= 0) acc[label].push((out as any).scores[idx]); + }); + + return acc; + }, + {} as Record, + ); + + // console.log('________________________________________'); + // console.log('sortedOut: ', sortedOut); + // console.log('________________________________________'); + + const result = { + max: Math.max(...(sortedOut[labels[0]] || [0])), + avg: + (sortedOut[labels[0]] || [0]).reduce((sum, val) => sum + val, 0) / + (sortedOut[labels[0]] || [0]).length, + details: { 'competence on task': out1, 'task on competence': out2, combined: out3 }, + }; + + return { + ...result, + aligning: result.max > 0.65 && result.avg > 0.5, + }; + } } diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 78bccddff..867586938 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -129,21 +129,33 @@ parentPort.on('message', async (message: any) => { let newDistance = Math.min(1, Math.max(0, match.distance - 0.45) * 2); // Get Alignment via Zero-Shot - const alignment = await ZeroShot.nliBiDirectional(description, match.text); + const sentiment = await ZeroShot.nliBiDirectional(description, match.text); + + const contradiction = await ZeroShot.contradictionCheck(description, match.text); + const alignment = await ZeroShot.alignmentCheck(description, match.text); + + // console.log('task: ', description); + // console.log('capability: ', match.text); + // console.log('alignment: ', alignment); + // console.log('________________________________________'); // First: Contradicting? - if (alignment.ranking[0] == 'contradict' || alignment.contradict > 0.3) { + if ( + sentiment.ranking[0] == 'contradict' || + sentiment.contradict > 0.3 || + contradiction.contradicting + ) { flag = 'contradicting'; newDistance = 0.0; // Second: Aligning? - } else if (alignment.entail > 0.55 && match.distance > 0.65) { + } else if (sentiment.entail > 0.55 && match.distance > 0.65 && alignment.aligning) { flag = 'aligning'; // Boost similarity-based distance - newDistance = Math.min(1, newDistance * 1.65); + newDistance = Math.min(1, newDistance * 1.5); } else { flag = 'neutral'; // Reduce distance for neutral - newDistance *= 0.75; + newDistance *= 0.65; } // Store match result for reasoning workaround From 10281ab4d5e129450b6833de639073cae6d07102 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sat, 4 Oct 2025 13:08:29 +0200 Subject: [PATCH 26/48] Get build to run --- .../src/middleware/match.ts | 175 +++++++++--------- src/competence-matcher/src/utils/prompts.ts | 2 +- src/competence-matcher/tsconfig.json | 5 +- 3 files changed, 94 insertions(+), 88 deletions(-) diff --git a/src/competence-matcher/src/middleware/match.ts b/src/competence-matcher/src/middleware/match.ts index 32f2651da..9a6b83a66 100644 --- a/src/competence-matcher/src/middleware/match.ts +++ b/src/competence-matcher/src/middleware/match.ts @@ -206,12 +206,34 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct ); } - handleCreateResourceList(req.dbName!, list, (job, code, jobId) => { - try { - // Embedding fails -> no matching possible (i.e. fail the matching job) - if (code !== 0) { + handleCreateResourceList( + req.dbName!, + list, + requestId, + (job: any, code: number, jobId: string) => { + try { + // Embedding fails -> no matching possible (i.e. fail the matching job) + if (code !== 0) { + try { + db.updateJobStatus(matchingJobId, 'failed'); + } catch (error) { + const dbError = new DatabaseError( + 'updateJobStatus', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + logger.databaseError( + 'Failed to update job status to failed', + dbError, + { matchingJobId }, + requestId, + ); + } + return; + } + try { - db.updateJobStatus(matchingJobId, 'failed'); + db.updateJobStatus(matchingJobId, 'pending'); } catch (error) { const dbError = new DatabaseError( 'updateJobStatus', @@ -219,100 +241,83 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct requestId, ); logger.databaseError( - 'Failed to update job status to failed', + 'Failed to update job status to pending', dbError, { matchingJobId }, requestId, ); + return; } - return; - } - try { - db.updateJobStatus(matchingJobId, 'pending'); - } catch (error) { - const dbError = new DatabaseError( - 'updateJobStatus', - error instanceof Error ? error : new Error(String(error)), - requestId, - ); - logger.databaseError( - 'Failed to update job status to pending', - dbError, - { matchingJobId }, - requestId, - ); - return; - } + // Retrieve the competence list ID + const { referenceId: listId } = db.getJob(jobId); + + // Create the matching job + const matchingJob: MatchingJob = { + jobId: matchingJobId, + dbName: req.dbName!, + listId, + resourceId: undefined, // For now, we don't support matching against a single resource + tasks: taskInput!.map((task) => { + return { + taskId: task.taskId, + name: task.name, + description: task.description, + executionInstructions: task.executionInstructions, + requiredCompetencies: (task.requiredCompetencies ?? []).map((competence) => + typeof competence === 'string' + ? (competence as string) + : ({ + competenceId: competence.competenceId, + name: competence.name, + description: competence.description, + externalQualificationNeeded: competence.externalQualificationNeeded, + renewTime: competence.renewTime, + proficiencyLevel: competence.proficiencyLevel, + qualificationDates: competence.qualificationDates, + lastUsages: competence.lastUsages, + } as CompetenceInput), + ) as string[] | CompetenceInput[], + }; + }), + }; - // Retrieve the competence list ID - const { referenceId: listId } = db.getJob(jobId); - - // Create the matching job - const matchingJob: MatchingJob = { - jobId: matchingJobId, - dbName: req.dbName!, - listId, - resourceId: undefined, // For now, we don't support matching against a single resource - tasks: taskInput!.map((task) => { - return { - taskId: task.taskId, - name: task.name, - description: task.description, - executionInstructions: task.executionInstructions, - requiredCompetencies: (task.requiredCompetencies ?? []).map((competence) => - typeof competence === 'string' - ? (competence as string) - : ({ - competenceId: competence.competenceId, - name: competence.name, - description: competence.description, - externalQualificationNeeded: competence.externalQualificationNeeded, - renewTime: competence.renewTime, - proficiencyLevel: competence.proficiencyLevel, - qualificationDates: competence.qualificationDates, - lastUsages: competence.lastUsages, - } as CompetenceInput), - ) as string[] | CompetenceInput[], - }; - }), - }; + // Enqueue the matching job + workerManager.enqueue(matchingJob, 'matcher'); + } catch (error) { + try { + db.updateJobStatus(matchingJobId, 'failed'); + } catch (dbError) { + const dbErrorObj = new DatabaseError( + 'updateJobStatus', + dbError instanceof Error ? dbError : new Error(String(dbError)), + requestId, + ); + logger.databaseError( + 'Failed to update job status after error', + dbErrorObj, + { matchingJobId }, + requestId, + ); + } - // Enqueue the matching job - workerManager.enqueue(matchingJob, 'matcher'); - } catch (error) { - try { - db.updateJobStatus(matchingJobId, 'failed'); - } catch (dbError) { - const dbErrorObj = new DatabaseError( - 'updateJobStatus', - dbError instanceof Error ? dbError : new Error(String(dbError)), + const matchingError = new CompetenceMatcherError( + `Failed to create inline matching job: ${error instanceof Error ? error.message : String(error)}`, + 'inline_job_creation', + 500, requestId, + { matchingJobId }, ); - logger.databaseError( - 'Failed to update job status after error', - dbErrorObj, + logger.error( + 'request', + 'Failed to create inline matching job', + matchingError, { matchingJobId }, requestId, ); } - - const matchingError = new CompetenceMatcherError( - `Failed to create inline matching job: ${error instanceof Error ? error.message : String(error)}`, - 'inline_job_creation', - 500, - requestId, - { matchingJobId }, - ); - logger.error( - 'request', - 'Failed to create inline matching job', - matchingError, - { matchingJobId }, - requestId, - ); - } - }); + }, + ); res .setHeader('Location', `${PATHS.match}/jobs/${matchingJobId}`) diff --git a/src/competence-matcher/src/utils/prompts.ts b/src/competence-matcher/src/utils/prompts.ts index df3dc5d48..eab19af6c 100644 --- a/src/competence-matcher/src/utils/prompts.ts +++ b/src/competence-matcher/src/utils/prompts.ts @@ -495,7 +495,7 @@ const MATCH_REASON_EXAMPLES: Message[] = [ content: ` Task: Manage emergency room triage, including rapid patient assessment and prioritization. Competence: Registered nurse with prior ER triage experience and ACLS certification. - Similarity Score: 0.99 + Similarity Score: 1 Alignment: aligning `, }, diff --git a/src/competence-matcher/tsconfig.json b/src/competence-matcher/tsconfig.json index c612291b4..c7bba6d11 100644 --- a/src/competence-matcher/tsconfig.json +++ b/src/competence-matcher/tsconfig.json @@ -1,13 +1,14 @@ { "compilerOptions": { - "lib": ["esnext"], + "lib": ["esnext", "dom"], "target": "ES2020", "module": "commonjs", "rootDir": "src", "outDir": "dist", "strict": true, "esModuleInterop": true, - "moduleResolution": "node" + "moduleResolution": "node", + "skipLibCheck": true }, "include": ["src"] } From 16bfe284bb43d6c4d42f491f91af3382e2b8b330 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sat, 4 Oct 2025 13:11:33 +0200 Subject: [PATCH 27/48] rename "run-production" to "production" --- src/competence-matcher/package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index 3487a1c03..e0072315e 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -6,7 +6,7 @@ "scripts": { "dev": "ts-node-dev --respawn --transpile-only --watch .env src/server.ts", "build": "tsc", - "run-production": "node dist/server.js" + "production": "node dist/server.js" }, "repository": { "type": "git", @@ -39,4 +39,4 @@ "engines": { "node": ">=23.5.0" } -} +} \ No newline at end of file From e220d58cbd783eaf61eb3260c3be451f7450f572 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sun, 5 Oct 2025 13:23:52 +0200 Subject: [PATCH 28/48] Refactor Worker Manager and Worker Pool for improved job handling and error management - Consolidated job queue management in WorkerPool with enhanced logging. - Introduced heartbeat mechanism for worker health monitoring. - Improved error handling for job failures with retry logic. - Simplified worker lifecycle management, including worker creation and termination. - Enhanced logging for better observability of worker states and job processing. - Updated WorkerManager to streamline job enqueuing and processing. - Added shutdown functionality to gracefully terminate workers and clear job queues. --- src/competence-matcher/src/config.ts | 3 +- .../src/middleware/match.ts | 172 +-- .../src/middleware/resource.ts | 79 +- src/competence-matcher/src/server.ts | 6 - src/competence-matcher/src/utils/types.ts | 7 + src/competence-matcher/src/utils/worker.ts | 43 +- src/competence-matcher/src/worker/embedder.ts | 58 +- src/competence-matcher/src/worker/matcher.ts | 60 +- .../src/worker/worker-manager.ts | 1214 +++++------------ 9 files changed, 557 insertions(+), 1085 deletions(-) diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index 50888aecd..1864b3ff3 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -21,9 +21,10 @@ export const config = { splittingLength: parseInt(process.env.SPLITTING_LENGTH || '1000', 10), // Set this to 0 to disable splitting reasonModel: process.env.REASON_MODEL || 'llama3.2', splittingSymbol: process.env.SPLITTING_SYMBOL || '', - maxWorkerThreads: parseInt(process.env.NUMBER_OF_THREADS || String(os.cpus().length - 1), 10), // -1 for main thread (kept for backward compatibility) embeddingWorkers: parseInt(process.env.EMBEDDING_WORKERS || '1', 10), // Number of embedding workers to keep alive matchingWorkers: parseInt(process.env.MATCHING_WORKERS || '1', 10), // Number of matching workers to keep alive + workerHeartbeatInterval: parseInt(process.env.WORKER_HEARTBEAT_INTERVAL || '30', 10) * 1_000, // Worker heartbeat interval in seconds (converted to ms) - how often workers send heartbeats + workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '45', 10) * 1_000, // Worker death timeout in seconds (converted to ms) - how long to wait before considering worker dead maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds logLevel: process.env.LOG_LEVEL || 'INFO', // Levels: 'DEBUG', 'INFO', 'WARN', 'ERROR' logTypes: process.env.LOG_TYPES || 'server,request,worker,database,model,system', diff --git a/src/competence-matcher/src/middleware/match.ts b/src/competence-matcher/src/middleware/match.ts index 9a6b83a66..15dfb1a74 100644 --- a/src/competence-matcher/src/middleware/match.ts +++ b/src/competence-matcher/src/middleware/match.ts @@ -206,123 +206,79 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct ); } - handleCreateResourceList( - req.dbName!, - list, - requestId, - (job: any, code: number, jobId: string) => { - try { - // Embedding fails -> no matching possible (i.e. fail the matching job) - if (code !== 0) { - try { - db.updateJobStatus(matchingJobId, 'failed'); - } catch (error) { - const dbError = new DatabaseError( - 'updateJobStatus', - error instanceof Error ? error : new Error(String(error)), - requestId, - ); - logger.databaseError( - 'Failed to update job status to failed', - dbError, - { matchingJobId }, - requestId, - ); - } - return; - } - - try { - db.updateJobStatus(matchingJobId, 'pending'); - } catch (error) { - const dbError = new DatabaseError( - 'updateJobStatus', - error instanceof Error ? error : new Error(String(error)), - requestId, - ); - logger.databaseError( - 'Failed to update job status to pending', - dbError, - { matchingJobId }, - requestId, - ); - return; - } - - // Retrieve the competence list ID - const { referenceId: listId } = db.getJob(jobId); - - // Create the matching job - const matchingJob: MatchingJob = { - jobId: matchingJobId, - dbName: req.dbName!, - listId, - resourceId: undefined, // For now, we don't support matching against a single resource - tasks: taskInput!.map((task) => { - return { - taskId: task.taskId, - name: task.name, - description: task.description, - executionInstructions: task.executionInstructions, - requiredCompetencies: (task.requiredCompetencies ?? []).map((competence) => - typeof competence === 'string' - ? (competence as string) - : ({ - competenceId: competence.competenceId, - name: competence.name, - description: competence.description, - externalQualificationNeeded: competence.externalQualificationNeeded, - renewTime: competence.renewTime, - proficiencyLevel: competence.proficiencyLevel, - qualificationDates: competence.qualificationDates, - lastUsages: competence.lastUsages, - } as CompetenceInput), - ) as string[] | CompetenceInput[], - }; - }), - }; + // Start the embedding job and get the promise for completion + const embeddingResult = handleCreateResourceList(req.dbName!, list, requestId); + + // Send response immediately with the matching job ID + res + .setHeader('Location', `${PATHS.match}/jobs/${matchingJobId}`) + .status(202) + .json({ jobId: matchingJobId, status: 'preprocessing' }); // Match the actual database status - // Enqueue the matching job - workerManager.enqueue(matchingJob, 'matcher'); + // Chain the matching job to start after embedding completes + embeddingResult.promise + .then(() => { + // Embedding is done, now update matching job status and start it + try { + db.updateJobStatus(matchingJobId, 'pending'); } catch (error) { - try { - db.updateJobStatus(matchingJobId, 'failed'); - } catch (dbError) { - const dbErrorObj = new DatabaseError( - 'updateJobStatus', - dbError instanceof Error ? dbError : new Error(String(dbError)), - requestId, - ); - logger.databaseError( - 'Failed to update job status after error', - dbErrorObj, - { matchingJobId }, - requestId, - ); - } - - const matchingError = new CompetenceMatcherError( - `Failed to create inline matching job: ${error instanceof Error ? error.message : String(error)}`, - 'inline_job_creation', - 500, + logger.error( + 'system', + 'Failed to update matching job status to pending', + error instanceof Error ? error : new Error(String(error)), + {}, requestId, - { matchingJobId }, ); + } + + const matchingJob: MatchingJob = { + jobId: matchingJobId, + dbName: req.dbName!, + listId: embeddingResult.listId, // Use the embedding result's listId + resourceId: undefined, + tasks: taskInput!.map((task) => { + return { + taskId: task.taskId, + name: task.name, + description: task.description, + executionInstructions: task.executionInstructions, + requiredCompetencies: (task.requiredCompetencies ?? []).map((competence) => + typeof competence === 'string' + ? (competence as string) + : ({ + competenceId: competence.competenceId, + name: competence.name, + description: competence.description, + externalQualificationNeeded: competence.externalQualificationNeeded, + renewTime: competence.renewTime, + proficiencyLevel: competence.proficiencyLevel, + qualificationDates: competence.qualificationDates, + lastUsages: competence.lastUsages, + } as CompetenceInput), + ) as string[] | CompetenceInput[], + }; + }), + }; + + workerManager.enqueue(matchingJob, 'matcher'); + }) + .catch((error) => { + // Embedding failed, mark matching job as failed too + try { + db.updateJobStatus(matchingJobId, 'failed'); + } catch (dbError) { + // Log but don't throw logger.error( - 'request', - 'Failed to create inline matching job', - matchingError, - { matchingJobId }, + 'system', + 'Failed to update matching job status to failed', + dbError instanceof Error ? dbError : new Error(String(dbError)), + {}, requestId, ); } - }, - ); + }); - res - .setHeader('Location', `${PATHS.match}/jobs/${matchingJobId}`) - .status(202) - .json({ jobId: matchingJobId, status: 'pending' }); + return; } } catch (error) { // Pass error to error handler middleware diff --git a/src/competence-matcher/src/middleware/resource.ts b/src/competence-matcher/src/middleware/resource.ts index 0f3090fec..9954901d6 100644 --- a/src/competence-matcher/src/middleware/resource.ts +++ b/src/competence-matcher/src/middleware/resource.ts @@ -86,12 +86,11 @@ export function getResourceList(req: Request, res: Response, next: NextFunction) } // Helper function to handle the creation logic -export async function handleCreateResourceList( +export function handleCreateResourceList( dbName: string, resources: ResourceInput[], requestId?: string, - onWorkerExit?: (job: any, code: number, jobId: string) => void, -): Promise<{ jobId: string; status: string }> { +): { jobId: string; listId: string; status: string; promise: Promise } { let resourceIds: string[] = []; let competences: CompetenceInput[][] = []; @@ -157,15 +156,17 @@ export async function handleCreateResourceList( // Workaround for now // Ideally, the worker should handle the splitting as well db.updateJobStatus(jobId!, 'preprocessing'); - let job: EmbeddingJob | undefined; - splitSemantically(descriptionEmbeddingInput) + const promise = splitSemantically(descriptionEmbeddingInput) .then((tasks) => { - job = { + const job: EmbeddingJob = { jobId: jobId!, dbName: dbName, tasks, }; + + db.updateJobStatus(jobId!, 'pending'); + return workerManager.enqueue(job, 'embedder'); }) .catch((err) => { logger.warn( @@ -177,20 +178,18 @@ export async function handleCreateResourceList( }, requestId, ); - job = { + + const job: EmbeddingJob = { jobId: jobId!, dbName: dbName, tasks: descriptionEmbeddingInput, }; - }) - .finally(() => { + db.updateJobStatus(jobId!, 'pending'); - workerManager.enqueue(job!, 'embedder', { - onExit: (job: any, code: number) => onWorkerExit?.(job, code, jobId!), - }); + return workerManager.enqueue(job, 'embedder'); }); - return { jobId: jobId!, status: 'pending' }; + return { jobId: jobId!, listId: listId!, status: 'pending', promise }; } export function createResourceList(req: Request, res: Response, next: NextFunction): void { @@ -201,48 +200,36 @@ export function createResourceList(req: Request, res: Response, next: NextFuncti return; } try { - handleCreateResourceList(req.dbName!, req.body, requestId) - .then(({ jobId, status }) => { - logger.debug( - 'request', - 'Resource list creation job created', - { - jobId, - status, - resourceCount: req.body.length, - }, - requestId, - ); + const { jobId, status } = handleCreateResourceList(req.dbName!, req.body, requestId); - res - .setHeader('Location', `${PATHS.resource}/jobs/${jobId}`) - .status(202) - .json({ jobId, status }); - }) - .catch((error) => { - logger.error( - 'request', - 'Error adding resource list', - error instanceof Error ? error : new Error(String(error)), - { - resourceCount: req.body.length, - }, - requestId, - ); - res.status(400).json({ error: error.message || 'Invalid request body format' }); - }); + logger.debug( + 'request', + 'Resource list creation job created', + { + jobId, + status, + resourceCount: req.body.length, + }, + requestId, + ); + + res + .setHeader('Location', `${PATHS.resource}/jobs/${jobId}`) + .status(202) + .json({ jobId, status }); } catch (error) { logger.error( 'request', - 'Error processing request body', + 'Error adding resource list', error instanceof Error ? error : new Error(String(error)), { - bodyType: typeof req.body, - bodyLength: Array.isArray(req.body) ? req.body.length : 'not array', + resourceCount: req.body.length, }, requestId, ); - res.status(400).json({ error: 'Invalid request body format' }); + res + .status(400) + .json({ error: error instanceof Error ? error.message : 'Invalid request body format' }); } } diff --git a/src/competence-matcher/src/server.ts b/src/competence-matcher/src/server.ts index 5beefa6e4..f0a5bf3b0 100644 --- a/src/competence-matcher/src/server.ts +++ b/src/competence-matcher/src/server.ts @@ -15,7 +15,6 @@ import { errorHandler } from './middleware/error-handler'; import { ensureAllOllamaModelsAreAvailable } from './utils/ollama'; import { ensureAllHuggingfaceModelsAreAvailable } from './utils/huggingface'; import { CompetenceMatcherError } from './utils/errors'; -import workerManager from './worker/worker-manager'; const { port: PORT } = config; @@ -48,11 +47,6 @@ async function main() { await ensureAllOllamaModelsAreAvailable(); logger.info('server', 'All required models are available'); - - // Wait for worker pools to be ready - logger.info('server', 'Waiting for worker pools to be ready...'); - await workerManager.ready(); - logger.info('server', 'All worker pools are ready'); } catch (error) { const initError = new CompetenceMatcherError( `Failed to initialise service: ${error instanceof Error ? error.message : String(error)}`, diff --git a/src/competence-matcher/src/utils/types.ts b/src/competence-matcher/src/utils/types.ts index 2a708da93..6f4ef92c0 100644 --- a/src/competence-matcher/src/utils/types.ts +++ b/src/competence-matcher/src/utils/types.ts @@ -186,3 +186,10 @@ export interface TransformerPipelineOptions { model: string; options?: PretrainedModelOptions; } + +export interface JobQueueItem { + job: EmbeddingJob | MatchingJob; + resolve: (result: any) => void; + reject: (error: Error) => void; + retryCount: number; +} diff --git a/src/competence-matcher/src/utils/worker.ts b/src/competence-matcher/src/utils/worker.ts index a53e4f147..51791a23d 100644 --- a/src/competence-matcher/src/utils/worker.ts +++ b/src/competence-matcher/src/utils/worker.ts @@ -69,7 +69,7 @@ export async function withJobUpdates( // Always re-throw the error so the worker can handle it appropriately throw error; } finally { - db.close(); + // Don't close the database connection - let DBManager handle connection lifecycle // Don't close parentPort or exit process for static worker pools // Workers need to stay alive to process more jobs } @@ -108,3 +108,44 @@ export function workerLogger( parentPort.postMessage(logData); } } + +/** + * Start sending heartbeat messages to the main thread + * This should be called once when a worker starts up + * + * @param workerType - Type of worker (e.g., 'embedder', 'matcher') + * @param intervalMs - Heartbeat interval in milliseconds (default: 20000ms = 20s) + * @returns Function to stop the heartbeat + */ +export function startHeartbeat(workerType: string, intervalMs: number = 20000): () => void { + if (!parentPort) { + throw new Error('startHeartbeat can only be called from worker threads'); + } + + const { threadId } = require('worker_threads'); + + const sendHeartbeat = () => { + workerLogger('system', 'debug', `${workerType} worker sending heartbeat`, { + workerType, + threadId, + }); + + parentPort!.postMessage({ + type: 'heartbeat', + workerType, + threadId, + timestamp: Date.now(), + }); + }; + + // Send initial heartbeat immediately + sendHeartbeat(); + + // Set up interval for regular heartbeats + const heartbeatInterval = setInterval(sendHeartbeat, intervalMs); + + // Return cleanup function + return () => { + clearInterval(heartbeatInterval); + }; +} diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index 28b70ae73..310dc096b 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -1,15 +1,9 @@ import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; import { splitSemantically } from '../tasks/semantic-split'; -import { withJobUpdates, workerLogger } from '../utils/worker'; +import { withJobUpdates, workerLogger, startHeartbeat } from '../utils/worker'; import { EmbeddingJob } from '../utils/types'; - -// // Initialise embedding model on startup -// try { -// Embedding.getInstance(); -// } catch (error) { -// // Model already initialised -// } +import { config } from '../config'; /** * New embedder worker that stays alive and processes jobs sequentially @@ -24,47 +18,35 @@ async function ensureModelsInitialised() { try { await Embedding.getInstance(); modelsInitialised = true; - workerLogger('system', 'info', 'Embedder worker model initialised', { threadId }); + workerLogger('system', 'debug', 'Embedder worker online', { threadId }); } catch (err) { throw err; } } -// Set up health check handler immediately, before any heavy initialisation -parentPort.on('message', async (message: any) => { - // Handle health checks with highest priority - if (message?.type === 'health_check') { - workerLogger('system', 'debug', `Health check received: ${message.checkId}`, { - threadId, - checkId: message.checkId, - }); - - parentPort!.postMessage({ - type: 'health_check_response', - checkId: message.checkId, - timestamp: Date.now(), - workerType: 'embedder', - threadId: threadId, - }); - - workerLogger('system', 'debug', `Health check response sent: ${message.checkId}`, { - threadId, - }); - return; - } +// Start heartbeat immediately +startHeartbeat('embedder', config.workerHeartbeatInterval); +// Set up job message handler +parentPort.on('message', async (message: any) => { // Handle job messages const job = message as EmbeddingJob; + workerLogger(job.jobId || 'system', 'debug', 'Embedder worker received job', { + threadId, + jobId: job.jobId, + taskCount: job.tasks?.length || 0, + }); + // ensure models are initialised (but do not run this for health_check) try { await ensureModelsInitialised(); } catch (err) { workerLogger( job.jobId || 'system', - 'error', - 'Failed to initialise models', - { threadId }, + 'debug', + 'Embedder worker failed to initialize models', + { threadId, jobId: job.jobId }, err instanceof Error ? err : new Error(String(err)), ); // Notify parent and exit or mark job failed @@ -78,7 +60,7 @@ parentPort.on('message', async (message: any) => { return; } - workerLogger(job.jobId, 'info', `Starting embedding job with ${job.tasks.length} tasks`, { + workerLogger(job.jobId, 'debug', `Starting embedding job with ${job.tasks.length} tasks`, { threadId, taskCount: job.tasks.length, }); @@ -138,7 +120,7 @@ parentPort.on('message', async (message: any) => { }); // Job completed successfully - workerLogger(job.jobId, 'info', `Embedding job completed`, { + workerLogger(job.jobId, 'debug', `Embedding job completed`, { threadId, taskCount: job.tasks.length, }); @@ -147,7 +129,7 @@ parentPort.on('message', async (message: any) => { // Just log it for worker context workerLogger( job.jobId, - 'error', + 'debug', `Embedding job failed`, { threadId, @@ -163,6 +145,6 @@ parentPort.on('message', async (message: any) => { } }); -workerLogger('system', 'info', `Embedder worker thread ready`, { +workerLogger('system', 'debug', `Embedder worker thread ready`, { threadId, }); diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 867586938..da29def32 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -1,18 +1,11 @@ import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; -import { withJobUpdates, workerLogger } from '../utils/worker'; +import { withJobUpdates, workerLogger, startHeartbeat } from '../utils/worker'; import { addReason } from '../tasks/reason'; import { Match, MatchingJob } from '../utils/types'; import ZeroShot, { labels } from '../tasks/semantic-zeroshot'; import CrossEncoder from '../tasks/cross-encode'; - -// // Initialise models on startup -// try { -// Embedding.getInstance(); -// ZeroShot.getInstance(); -// } catch (error) { -// // Models already initialised -// } +import { config } from '../config'; /** * New matcher worker that stays alive and processes jobs sequentially @@ -28,47 +21,36 @@ async function ensureModelsInitialised() { await Embedding.getInstance(); await ZeroShot.getInstance(); modelsInitialised = true; - workerLogger('system', 'info', 'Matcher worker models initialised', { threadId }); + workerLogger('system', 'debug', 'Matcher worker online', { threadId }); } catch (err) { // Bubble up so job handling can report the error throw err; } } -// Set up health check handler immediately, before any heavy initialisation -parentPort.on('message', async (message: any) => { - // Handle health checks with highest priority - if (message?.type === 'health_check') { - workerLogger('system', 'debug', `Health check received: ${message.checkId}`, { - threadId, - checkId: message.checkId, - }); - - parentPort!.postMessage({ - type: 'health_check_response', - checkId: message.checkId, - timestamp: Date.now(), - workerType: 'matcher', - threadId: threadId, - }); - - workerLogger('system', 'debug', `Health check response sent: ${message.checkId}`, { - threadId, - }); - return; - } +// Start heartbeat immediately +startHeartbeat('matcher', config.workerHeartbeatInterval); +// Set up job message handler +parentPort.on('message', async (message: any) => { // Handle job messages const job = message as MatchingJob; + workerLogger(job.jobId || 'system', 'debug', 'Matcher worker received job', { + threadId, + jobId: job.jobId, + taskCount: job.tasks?.length || 0, + listId: job.listId, + }); + try { await ensureModelsInitialised(); } catch (err) { workerLogger( job.jobId || 'system', - 'error', - 'Failed to initialise models', - { threadId }, + 'debug', + 'Matcher worker failed to initialize models', + { threadId, jobId: job.jobId }, err instanceof Error ? err : new Error(String(err)), ); // Notify parent and exit or mark job failed @@ -82,7 +64,7 @@ parentPort.on('message', async (message: any) => { return; } - workerLogger(job.jobId, 'info', `Starting matching job with ${job.tasks.length} tasks`, { + workerLogger(job.jobId, 'debug', `Starting matching job with ${job.tasks.length} tasks`, { threadId, taskCount: job.tasks.length, }); @@ -207,7 +189,7 @@ parentPort.on('message', async (message: any) => { ); // Job completed successfully - workerLogger(job.jobId, 'info', `Matching job completed`, { + workerLogger(job.jobId, 'debug', `Matching job completed`, { threadId, taskCount: job.tasks.length, }); @@ -216,7 +198,7 @@ parentPort.on('message', async (message: any) => { // Just log it for worker context workerLogger( job.jobId, - 'error', + 'debug', `Matching job failed`, { threadId, @@ -232,6 +214,6 @@ parentPort.on('message', async (message: any) => { } }); -workerLogger('system', 'info', `Matcher worker thread ready`, { +workerLogger('system', 'debug', `Matcher worker thread ready`, { threadId, }); diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 7406b3741..b036e1696 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -1,941 +1,433 @@ import { Worker } from 'worker_threads'; import { config } from '../config'; import { createWorker } from '../utils/worker'; -import { EmbeddingJob, MatchingJob, workerTypes, Match } from '../utils/types'; -import { WorkerError, DatabaseError, ReasoningError } from '../utils/errors'; +import { EmbeddingJob, JobQueueItem, MatchingJob, workerTypes } from '../utils/types'; +import { WorkerError } from '../utils/errors'; import { getLogger } from '../utils/logger'; import { addReason } from '../tasks/reason'; import { getDB } from '../utils/db'; -const { - embeddingWorkers, - matchingWorkers, - workerHealthCheckTimeout, - maxWorkerRetries, - workerRetryWindow, - systemStartupTimeout, - maxJobTime, -} = config; +const { embeddingWorkers, matchingWorkers, workerHeartbeatInterval, workerDeathTimeout } = config; const logger = getLogger(); -// Job queue interface for task-specific queues -interface JobQueueItem { - job: EmbeddingJob | MatchingJob; - options?: { - onOnline?: (job: any) => void; - onExit?: (job: any, code: number) => void; - onError?: (job: any, error: Error) => void; - onMessage?: (job: any, message: any) => void; - }; -} - -// Worker pool for a specific task type +/** + * WorkerPool - Manages workers for a specific task type + */ class WorkerPool { private workers: Worker[] = []; - private queue: JobQueueItem[] = []; + private availableWorkers: Set = new Set(); + private busyWorkers: Map = new Map(); + private activeJobs: Map = new Map(); // Track jobs being processed + private jobQueue: JobQueueItem[] = []; + private workerDeathTimers: Map = new Map(); + private workersBeingReplaced: Set = new Set(); // Prevent double replacement using threadId private readonly workerType: workerTypes; private readonly poolSize: number; - private availableWorkers: Set = new Set(); - private busyWorkers: Map = new Map(); // Maps worker to current jobId - private pendingHealthChecks: Map = new Map(); // Track pending health checks - private beingReplaced: Set = new Set(); // Track workers being replaced to prevent double replacement - private jobTimeouts: Map = new Map(); // Track job timeouts - - // Retry tracking - private failureCount: number = 0; // Total failures for this worker type - private lastFailureTime: number = 0; // Timestamp of last failure - private consecutiveFailures: number = 0; // Consecutive failures without recovery - - // Health check failure tracking - private consecutiveHealthCheckFailures: number = 0; // Consecutive health check failures - private poolBroken: boolean = false; // Pool marked as broken due to persistent failures constructor(workerType: workerTypes, poolSize: number) { this.workerType = workerType; this.poolSize = poolSize; - this.initialiseWorkers(); - } - - /** - * Initialise static worker pool - workers stay alive and process jobs sequentially - */ - private initialiseWorkers() { - for (let i = 0; i < this.poolSize; i++) { - this.createAndRegisterWorker(); - } - logger.debug('system', `[WorkerPool] Initialised ${this.poolSize} ${this.workerType} workers`); - } + logger.info('worker', `Initializing ${workerType} pool with ${poolSize} workers`, { + workerType, + poolSize, + }); - /** - * Create a new worker and register it in the pool - */ - private createAndRegisterWorker() { - // Don't create new workers if pool is broken - if (this.poolBroken) { - logger.debug( - 'system', - `[WorkerPool] Not creating new ${this.workerType} worker - pool is marked as broken`, - ); - return; - } + this.initialiseWorkers(); - // Check if we already have enough workers (including those pending health checks) - if (this.workers.length >= this.poolSize) { - logger.debug( - 'system', - `[WorkerPool] Not creating new ${this.workerType} worker - pool already has ${this.workers.length}/${this.poolSize} workers`, - ); - return; - } + logger.debug('worker', `${workerType} pool initialization completed`, { + workerType, + totalWorkers: this.workers.length, + availableWorkers: this.availableWorkers.size, + busyWorkers: this.busyWorkers.size, + }); + } - let worker: Worker; - try { - worker = createWorker(this.workerType); - } catch (error) { - const workerError = new WorkerError( - this.workerType, - 'unknown', - error instanceof Error ? error : new Error(String(error)), - ); + private initialiseWorkers(): void { + logger.debug('worker', `Initializing ${this.poolSize} ${this.workerType} workers`, { + workerType: this.workerType, + poolSize: this.poolSize, + }); - logger.workerError('worker_pool_creation_failure', workerError, { + for (let i = 0; i < this.poolSize; i++) { + logger.debug('worker', `Creating ${this.workerType} worker ${i + 1}/${this.poolSize}`, { workerType: this.workerType, - poolSize: this.poolSize, + workerIndex: i + 1, + totalToCreate: this.poolSize, }); - return; + this.createWorker(); } - this.workers.push(worker); - // DON'T add to availableWorkers yet - only after health check passes - - // Perform health check after worker is online - worker.once('online', () => { - this.performHealthCheck(worker); - }); - - // Handle worker lifecycle events - worker.once('online', () => { - logger.debug('system', `[WorkerPool] ${this.workerType} worker ${worker.threadId} is online`); + logger.debug('worker', `Finished initializing ${this.workerType} workers`, { + workerType: this.workerType, + workersCreated: this.workers.length, + expectedWorkers: this.poolSize, }); + } - worker.on('error', (err) => { - const jobId = this.busyWorkers.get(worker) || 'unknown'; - const workerError = new WorkerError( - this.workerType, - jobId, - err instanceof Error ? err : new Error(String(err)), - ); - - // Track failure and determine logging severity - const { shouldLogAsError, retryCount } = this.trackWorkerFailure(); - - const errorData = { - workerType: this.workerType, - threadId: worker.threadId, - jobId, - retryCount, - maxRetries: maxWorkerRetries, - }; - - if (shouldLogAsError) { - logger.workerError('worker_pool_runtime_error', workerError, errorData); - } else { - logger.warn( - 'worker', - `Worker runtime error (attempt ${retryCount}/${maxWorkerRetries}): ${err.message}`, - errorData, - ); - } + private createWorker(): void { + const worker = createWorker(this.workerType); + this.workers.push(worker); - // Mark worker as available again and try to process next job - this.markWorkerAvailable(worker); - this.processNextJob(); + logger.debug('worker', `Created new ${this.workerType} worker`, { + workerType: this.workerType, + threadId: worker.threadId, + totalWorkers: this.workers.length, + poolSize: this.poolSize, + availableWorkers: this.availableWorkers.size, + busyWorkers: this.busyWorkers.size, }); - // Handle unexpected worker exits - restart the worker - worker.once('exit', (code) => { - const jobId = this.busyWorkers.get(worker) || 'unknown'; - - logger.debug( - 'system', - `[WorkerPool] ${this.workerType} worker ${worker.threadId} exited with code ${code}`, - ); + // Set initial death timer + const deathTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); + this.workerDeathTimers.set(worker, deathTimer); - // Check if this worker is already being replaced to prevent double replacement - if (this.beingReplaced.has(worker)) { - logger.debug( - 'system', - `[WorkerPool] Worker ${worker.threadId} already being replaced, skipping duplicate replacement`, - ); - this.removeWorkerFromPool(worker); - return; + worker.on('message', (message: any) => { + if (message.type === 'heartbeat') { + this.handleHeartbeat(worker); + } else if (message.type === 'log') { + // Handle log messages from workers + this.handleWorkerLog(message); } - - // Remove from all tracking sets/maps and create replacement - this.removeWorkerFromPool(worker); - this.createAndRegisterWorker(); }); - } - /** - * Add a job to this pool's queue - */ - public enqueue(job: EmbeddingJob | MatchingJob, options?: JobQueueItem['options']) { - this.queue.push({ job, options }); - - logger.debug( - 'system', - `[WorkerPool] Enqueued ${this.workerType} job ${job.jobId} (queue: ${this.queue.length}, available: ${this.availableWorkers.size})`, - ); - - this.processNextJob(); + worker.on('error', (error) => this.handleWorkerFailure(worker, error)); + worker.once('exit', () => this.handleWorkerFailure(worker, new Error('Worker exited'))); } - /** - * Process the next job in queue if workers are available - */ - private processNextJob() { - if (this.queue.length === 0 || this.availableWorkers.size === 0) { - return; - } - - const worker = this.availableWorkers.values().next().value as Worker; - const queueItem = this.queue.shift()!; - - this.assignJobToWorker(worker, queueItem); - } - - /** - * Assign a specific job to a specific worker - */ - private assignJobToWorker(worker: Worker, { job, options }: JobQueueItem) { - // Mark worker as busy - this.availableWorkers.delete(worker); - this.busyWorkers.set(worker, job.jobId); - - logger.debug( - 'system', - `[WorkerPool] Assigning ${this.workerType} job ${job.jobId} to worker ${worker.threadId}`, - ); - - // Set up job timeout - const jobTimeout = setTimeout(() => { - this.handleJobTimeout(worker, job); - }, maxJobTime); - this.jobTimeouts.set(worker, jobTimeout); - - // Set up message handling for this specific job - const messageHandler = (message: any) => { - try { - switch (message.type) { - case 'status': - logger.debug( - 'system', - `[WorkerPool] Worker ${worker.threadId} for job ${message.jobId || job.jobId} status: ${message.status}`, - ); - break; - case 'error': - logger.workerError( - 'worker_reported_error', - new WorkerError( - this.workerType, - message.jobId || job.jobId, - new Error(message.error), - ), - { - workerType: this.workerType, - threadId: worker.threadId, - jobId: message.jobId || job.jobId, - reportedError: message.error, - }, - ); - break; - case 'log': - // Forward worker logs to main logger - try { - const logType = message.logType || 'worker'; - switch (message.level) { - case 'debug': - logger.debug(logType, message.message, message.data); - break; - case 'info': - logger.info(logType, message.message, message.data); - break; - case 'warn': - logger.warn(logType, message.message, message.data); - break; - case 'error': - const error = message.error ? new Error(message.error.message) : undefined; - if (error && message.error.stack) error.stack = message.error.stack; - logger.error(logType, message.message, error, message.data); - break; - } - } catch (err) { - logger.error( - 'system', - 'Failed to forward worker log', - err instanceof Error ? err : new Error(String(err)), - ); - } - break; - case 'job_completed': - // Clear job timeout since job completed - this.clearJobTimeout(worker); - - // Job is done, mark worker as available and process next job - this.markWorkerAvailable(worker); - this.processNextJob(); - options?.onExit?.(job, 0); - break; - case 'job': - // Handle special job messages (like reasoning requests from matching workers) - if (message.job === 'reason') { - // Handle reasoning asynchronously without blocking the worker - handleReasoning(job, message).catch((error: any) => { - logger.workerError( - 'reasoning_handler_async_failure', - error instanceof Error ? error : new Error(String(error)), - { jobId: job.jobId }, - ); - }); - } - break; - } - - options?.onMessage?.(job, message); - } catch (error) { - const messageHandlingError = new WorkerError( - this.workerType, - job.jobId, - error instanceof Error ? error : new Error(String(error)), - ); - - logger.workerError('worker_message_handling_error', messageHandlingError, { - workerType: this.workerType, - threadId: worker.threadId, - jobId: job.jobId, - messageType: message.type, - }); - } - }; - - worker.on('message', messageHandler); - - // Send the job to the worker - try { - worker.postMessage(job); - options?.onOnline?.(job); - } catch (error) { - // Clear timeout since job failed to start - this.clearJobTimeout(worker); + private handleHeartbeat(worker: Worker): void { + logger.debug('worker', `Heartbeat received from ${this.workerType} worker`, { + workerType: this.workerType, + threadId: worker.threadId, + isBusy: this.busyWorkers.has(worker), + }); - const messageError = new WorkerError( - this.workerType, - job.jobId, - error instanceof Error ? error : new Error(String(error)), - ); + // Reset death timer + const existingTimer = this.workerDeathTimers.get(worker); + if (existingTimer) clearTimeout(existingTimer); - logger.workerError('worker_message_send_failure', messageError, { - workerType: this.workerType, - threadId: worker.threadId, - jobId: job.jobId, - }); + const newTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); + this.workerDeathTimers.set(worker, newTimer); - // Remove the message handler and mark worker as available - worker.off('message', messageHandler); - this.markWorkerAvailable(worker); + // Mark available if not busy + if (!this.busyWorkers.has(worker)) { + this.availableWorkers.add(worker); this.processNextJob(); - options?.onError?.(job, messageError); } } - /** - * Handle job timeout for a worker - */ - private handleJobTimeout(worker: Worker, job: EmbeddingJob | MatchingJob) { - const jobId = job.jobId; - - logger.workerError( - 'job_timeout', - new WorkerError(this.workerType, jobId, new Error(`Job timed out after ${maxJobTime}ms`)), - { - workerType: this.workerType, - threadId: worker.threadId, - jobId, - timeout: maxJobTime, - }, - ); - - // Clear the timeout from tracking - this.clearJobTimeout(worker); - - // Update job status in database - try { - const db = getDB(job.dbName); - db.updateJobStatus(jobId, 'failed'); - db.close(); - } catch (error) { - logger.error( - 'system', - `Failed to update job status for timed out job ${jobId}`, - error instanceof Error ? error : new Error(String(error)), + private handleWorkerLog(message: any): void { + // Forward worker logs to the main logger + const { level, logType, message: logMessage, data, error } = message; + + if (error) { + const reconstructedError = new Error(error.message); + reconstructedError.stack = error.stack; + reconstructedError.name = error.name; + logger[level as 'debug' | 'info' | 'warn' | 'error']( + logType, + logMessage, + reconstructedError, + data, ); + } else { + logger[level as 'debug' | 'info' | 'warn' | 'error'](logType, logMessage, data); } + } - // Mark worker as being replaced to prevent double replacement - this.beingReplaced.add(worker); - - // Terminate the unresponsive worker - try { - worker.terminate(); - logger.debug( - 'system', - `[WorkerPool] Terminated ${this.workerType} worker ${worker.threadId} due to job timeout`, - ); - } catch (error) { + private killWorker(worker: Worker): void { + // Prevent double replacement if worker is already being replaced + if (this.workersBeingReplaced.has(worker.threadId)) { logger.debug( - 'system', - `[WorkerPool] Failed to terminate timed-out worker ${worker.threadId}:`, - error, + 'worker', + `Skipping replacement for ${this.workerType} worker - already being replaced`, + { + workerType: this.workerType, + threadId: worker.threadId, + }, ); + return; } - // Remove worker and create replacement - this.removeWorkerFromPool(worker); - this.createAndRegisterWorker(); - } - - /** - * Clear job timeout for a worker - */ - private clearJobTimeout(worker: Worker) { - const timeout = this.jobTimeouts.get(worker); - if (timeout) { - clearTimeout(timeout); - this.jobTimeouts.delete(worker); - } - } - - /** - * Perform health check on a worker to ensure it's responsive - */ - private performHealthCheck(worker: Worker) { - const healthCheckId = `health_check_${Date.now()}_${worker.threadId}`; - const timeout = workerHealthCheckTimeout; - - logger.debug( - 'system', - `[WorkerPool] Performing health check on ${this.workerType} worker ${worker.threadId}`, - ); - - // Set up timeout for health check - const healthCheckTimeout = setTimeout(() => { - // Track health check failure and check if pool should be marked as broken - const poolBroken = this.trackHealthCheckFailure(); - - const error = new WorkerError( - this.workerType, - healthCheckId, - new Error('Health check timeout'), - ); - const errorData = { - workerType: this.workerType, - threadId: worker.threadId, - timeout, - consecutiveHealthCheckFailures: this.consecutiveHealthCheckFailures, - maxHealthCheckFailures: 5, - }; - - if (poolBroken) { - logger.error( - 'worker', - `Worker health check timeout - pool marked as broken`, - undefined, - errorData, - ); - } else { - logger.warn( - 'worker', - `Worker health check timeout (failure ${this.consecutiveHealthCheckFailures}/5)`, - errorData, - ); - } - - // Clean up pending health check - this.pendingHealthChecks.delete(worker); - - // Mark worker as being replaced to prevent double replacement - this.beingReplaced.add(worker); - - // Terminate unresponsive worker explicitly - try { - worker.terminate(); - logger.debug( - 'system', - `[WorkerPool] Terminated unresponsive ${this.workerType} worker ${worker.threadId}`, - ); - } catch (error) { - logger.debug( - 'system', - `[WorkerPool] Failed to terminate worker ${worker.threadId}:`, - error, - ); - } - - // Remove unresponsive worker and create a replacement only if pool is not broken - this.removeWorkerFromPool(worker); - if (!poolBroken) { - this.createAndRegisterWorker(); - } - }, timeout); - - // Store the timeout so we can clear it if worker responds - this.pendingHealthChecks.set(worker, healthCheckTimeout); - - // Listen for health check response - const healthCheckHandler = (message: any) => { - if (message?.type === 'health_check_response' && message?.checkId === healthCheckId) { - // Clear the timeout since worker responded - const timeout = this.pendingHealthChecks.get(worker); - if (timeout) { - clearTimeout(timeout); - this.pendingHealthChecks.delete(worker); - } - - worker.off('message', healthCheckHandler); - - // NOW mark worker as available since it passed health check - this.availableWorkers.add(worker); + this.workersBeingReplaced.add(worker.threadId); - // Reset failure tracking on successful recovery - this.resetFailureTracking(); - this.resetHealthCheckFailures(); + logger.warn('worker', `Killing unresponsive ${this.workerType} worker (heartbeat timeout)`, { + workerType: this.workerType, + threadId: worker.threadId, + reason: 'heartbeat_timeout', + totalWorkersBefore: this.workers.length, + availableWorkersBefore: this.availableWorkers.size, + busyWorkersBefore: this.busyWorkers.size, + }); - logger.debug( - 'system', - `[WorkerPool] ${this.workerType} worker ${worker.threadId} passed health check and is now available`, - ); + this.removeWorker(worker); - // Process any queued jobs now that we have an available worker - this.processNextJob(); - } - }; + logger.info('worker', `Creating replacement ${this.workerType} worker after timeout`, { + workerType: this.workerType, + killedThreadId: worker.threadId, + totalWorkersAfterRemoval: this.workers.length, + poolSize: this.poolSize, + }); - worker.on('message', healthCheckHandler); + this.createWorker(); - // Send health check request - try { - worker.postMessage({ - type: 'health_check', - checkId: healthCheckId, - timestamp: Date.now(), - }); - } catch (error) { - // Clear the timeout and pending health check - const timeout = this.pendingHealthChecks.get(worker); - if (timeout) { - clearTimeout(timeout); - this.pendingHealthChecks.delete(worker); - } + logger.debug('worker', `Replacement ${this.workerType} worker creation completed`, { + workerType: this.workerType, + totalWorkersAfterReplacement: this.workers.length, + availableWorkersAfterReplacement: this.availableWorkers.size, + busyWorkersAfterReplacement: this.busyWorkers.size, + poolSize: this.poolSize, + }); - worker.off('message', healthCheckHandler); + this.workersBeingReplaced.delete(worker.threadId); + } - logger.workerError( - 'worker_health_check_send_failure', - new WorkerError( - this.workerType, - healthCheckId, - error instanceof Error ? error : new Error(String(error)), - ), + private handleWorkerFailure(worker: Worker, error: Error): void { + // Prevent double replacement if worker is already being replaced + if (this.workersBeingReplaced.has(worker.threadId)) { + logger.debug( + 'worker', + `Skipping replacement for failed ${this.workerType} worker - already being replaced`, { workerType: this.workerType, threadId: worker.threadId, + error: error.message, }, ); - - // Remove faulty worker and create a replacement - this.removeWorkerFromPool(worker); - this.createAndRegisterWorker(); + return; } - } - - /** - * Remove a worker from all tracking structures - */ - private removeWorkerFromPool(worker: Worker) { - this.availableWorkers.delete(worker); - this.busyWorkers.delete(worker); - this.beingReplaced.delete(worker); - // Clean up any pending health check - const healthCheckTimeout = this.pendingHealthChecks.get(worker); - if (healthCheckTimeout) { - clearTimeout(healthCheckTimeout); - this.pendingHealthChecks.delete(worker); - } + this.workersBeingReplaced.add(worker.threadId); - // Clean up any job timeout - this.clearJobTimeout(worker); + logger.error('worker', `${this.workerType} worker failed, replacing worker`, { + // @ts-ignore + workerType: this.workerType, + threadId: worker.threadId, + error: error.message, + stack: error.stack, + totalWorkersBefore: this.workers.length, + availableWorkersBefore: this.availableWorkers.size, + busyWorkersBefore: this.busyWorkers.size, + hasActiveJob: this.activeJobs.has(worker), + }); - const index = this.workers.indexOf(worker); - if (index > -1) { - this.workers.splice(index, 1); + const activeJob = this.activeJobs.get(worker); + if (activeJob) { + // Recover the job that was being processed + this.activeJobs.delete(worker); + logger.warn('worker', `Recovering job from failed ${this.workerType} worker`, { + workerType: this.workerType, + threadId: worker.threadId, + jobId: activeJob.job.jobId, + retryCount: activeJob.retryCount, + }); + this.handleJobFailure(activeJob, error); } - try { - worker.terminate(); - } catch (error) { - // Ignore termination errors - } - } + this.removeWorker(worker); - /** - * Track a health check failure and determine if pool should be marked as broken - */ - private trackHealthCheckFailure(): boolean { - this.consecutiveHealthCheckFailures++; + logger.info('worker', `Creating replacement ${this.workerType} worker after failure`, { + workerType: this.workerType, + failedThreadId: worker.threadId, + totalWorkersAfterRemoval: this.workers.length, + poolSize: this.poolSize, + }); - // Mark pool as broken after 5 consecutive health check failures - if (this.consecutiveHealthCheckFailures >= 5) { - this.poolBroken = true; - logger.error( - 'worker', - `Pool ${this.workerType} marked as broken after ${this.consecutiveHealthCheckFailures} consecutive health check failures`, - undefined, - { - workerType: this.workerType, - consecutiveFailures: this.consecutiveHealthCheckFailures, - }, - ); - return true; - } + this.createWorker(); - return false; - } + logger.debug( + 'worker', + `Replacement ${this.workerType} worker creation completed after failure`, + { + workerType: this.workerType, + totalWorkersAfterReplacement: this.workers.length, + availableWorkersAfterReplacement: this.availableWorkers.size, + busyWorkersAfterReplacement: this.busyWorkers.size, + poolSize: this.poolSize, + }, + ); - /** - * Reset health check failure tracking on successful health check - */ - private resetHealthCheckFailures() { - this.consecutiveHealthCheckFailures = 0; - this.poolBroken = false; + this.workersBeingReplaced.delete(worker.threadId); } - /** - * Check if this pool is broken due to persistent health check failures - */ - public isBroken(): boolean { - return this.poolBroken; - } + private removeWorker(worker: Worker): void { + logger.debug('worker', `Removing ${this.workerType} worker from pool`, { + workerType: this.workerType, + threadId: worker.threadId, + wasAvailable: this.availableWorkers.has(worker), + wasBusy: this.busyWorkers.has(worker), + hadActiveJob: this.activeJobs.has(worker), + totalWorkersBefore: this.workers.length, + }); - /** - * Track a worker failure and determine appropriate logging level - */ - private trackWorkerFailure(): { shouldLogAsError: boolean; retryCount: number } { - const now = Date.now(); + this.availableWorkers.delete(worker); + this.busyWorkers.delete(worker); + this.activeJobs.delete(worker); // Clean up active job tracking - // Reset consecutive failures if enough time has passed since last failure - if (now - this.lastFailureTime > workerRetryWindow) { - this.consecutiveFailures = 0; + const timer = this.workerDeathTimers.get(worker); + if (timer) { + clearTimeout(timer); + this.workerDeathTimers.delete(worker); } - this.failureCount++; - this.consecutiveFailures++; - this.lastFailureTime = now; - - // Log as ERROR only if we've exceeded the max retries - const shouldLogAsError = this.consecutiveFailures > maxWorkerRetries; - - return { - shouldLogAsError, - retryCount: this.consecutiveFailures, - }; - } + const index = this.workers.indexOf(worker); + if (index > -1) this.workers.splice(index, 1); - /** - * Reset failure tracking when workers recover successfully - */ - private resetFailureTracking() { - this.consecutiveFailures = 0; - } + // Remove all event listeners to prevent exit event from firing + worker.removeAllListeners(); - /** - * Mark a worker as available for new jobs - */ - private markWorkerAvailable(worker: Worker) { - this.busyWorkers.delete(worker); - this.availableWorkers.add(worker); + // Clean up replacement tracking + this.workersBeingReplaced.delete(worker.threadId); - // Clear any job timeout since job is completed - this.clearJobTimeout(worker); + worker.terminate(); - // Reset failure tracking on successful job completion - this.resetFailureTracking(); + logger.debug('worker', `Removed ${this.workerType} worker from pool`, { + workerType: this.workerType, + threadId: worker.threadId, + totalWorkersAfter: this.workers.length, + availableWorkersAfter: this.availableWorkers.size, + busyWorkersAfter: this.busyWorkers.size, + }); } - /** - * Get pool statistics - */ - public getStats() { - return { + public async executeJob(job: EmbeddingJob | MatchingJob): Promise { + logger.info('worker', `Received ${this.workerType} job for queuing`, { workerType: this.workerType, - totalWorkers: this.workers.length, + jobId: job.jobId, + queueLength: this.jobQueue.length, availableWorkers: this.availableWorkers.size, - busyWorkers: this.busyWorkers.size, - pendingHealthChecks: this.pendingHealthChecks.size, - queuedJobs: this.queue.length, - consecutiveHealthCheckFailures: this.consecutiveHealthCheckFailures, - poolBroken: this.poolBroken, - }; - } - - /** - * Shutdown all workers in this pool - */ - public async shutdown() { - logger.debug('system', `[WorkerPool] Shutting down ${this.workerType} worker pool`); - - const shutdownPromises = this.workers.map((worker) => { - return new Promise((resolve) => { - worker.once('exit', () => resolve()); - worker.terminate(); - }); }); - await Promise.all(shutdownPromises); - - this.workers.length = 0; - this.availableWorkers.clear(); - this.busyWorkers.clear(); - this.queue.length = 0; + return new Promise((resolve, reject) => { + this.jobQueue.push({ job, resolve, reject, retryCount: 0 }); + this.processNextJob(); + }); } -} -/** - * New WorkerManager with static worker pools per task type - */ -class WorkerManager { - private embeddingPool: WorkerPool; - private matchingPool: WorkerPool; - private readyPromise: Promise; - private isReady: boolean = false; + private processing = false; - constructor() { - // Initialise static worker pools - this.embeddingPool = new WorkerPool('embedder', embeddingWorkers); - this.matchingPool = new WorkerPool('matcher', matchingWorkers); - - logger.debug( - 'system', - `[WorkerManager] Initialised with ${embeddingWorkers} embedding workers and ${matchingWorkers} matching workers`, - ); - - // Create ready promise - this.readyPromise = this.waitForWorkersReady(); - } + private processNextJob(): void { + if (this.processing) return; // Prevent race conditions + if (this.jobQueue.length === 0 || this.availableWorkers.size === 0) return; - /** - * Wait for all worker pools to have at least one available worker - */ - private async waitForWorkersReady(): Promise { - logger.debug('system', '[WorkerManager] Waiting for worker pools to become ready...'); - - const maxWaitTime = systemStartupTimeout; - const checkInterval = 500; // Check every 500ms - const startTime = Date.now(); + this.processing = true; + const worker = this.availableWorkers.values().next().value as Worker; + const queueItem = this.jobQueue.shift()!; - return new Promise((resolve, reject) => { - const checkReady = () => { - const embeddingStats = this.embeddingPool.getStats(); - const matchingStats = this.matchingPool.getStats(); - - // Check if any pools are broken and fail fast - if (embeddingStats.poolBroken || matchingStats.poolBroken) { - const brokenPools = []; - if (embeddingStats.poolBroken) brokenPools.push('embedding'); - if (matchingStats.poolBroken) brokenPools.push('matching'); - - reject( - new Error( - `Worker pools failed to initialise due to persistent health check failures: ${brokenPools.join(', ')}`, - ), - ); - return; - } - - const embeddingReady = embeddingStats.availableWorkers > 0; - const matchingReady = matchingStats.availableWorkers > 0; - - if (embeddingReady && matchingReady) { - this.isReady = true; - logger.debug( - 'system', - `[WorkerManager] All worker pools ready: - - Embedding workers: ${embeddingStats.totalWorkers} total, ${embeddingStats.availableWorkers} available - - Matching workers: ${matchingStats.totalWorkers} total, ${matchingStats.availableWorkers} available`, - ); - resolve(); - } else if (Date.now() - startTime > maxWaitTime) { - reject(new Error('Timeout waiting for worker pools to become ready')); - } else { - logger.debug( - 'system', - `[WorkerManager] Waiting for workers... Embedding: ${embeddingReady ? '✓' : '✗'}, Matching: ${matchingReady ? '✓' : '✗'}`, - ); - setTimeout(checkReady, checkInterval); - } - }; - - // Start checking after a short delay to allow workers to initialise - setTimeout(checkReady, 1_000); + logger.debug('worker', `Processing next job in queue`, { + workerType: this.workerType, + queueLength: this.jobQueue.length, + availableWorkers: this.availableWorkers.size, }); - } - /** - * Promise that resolves when all worker pools are ready - */ - public ready(): Promise { - return this.readyPromise; + this.assignJobToWorker(worker, queueItem); + this.processing = false; } - /** - * Check if worker manager is ready (synchronous) - */ - public isWorkerManagerReady(): boolean { - return this.isReady; - } + private assignJobToWorker(worker: Worker, queueItem: JobQueueItem): void { + const { job, resolve, reject, retryCount } = queueItem; + this.availableWorkers.delete(worker); + this.busyWorkers.set(worker, job.jobId); + this.activeJobs.set(worker, queueItem); // Track the active job - /** - * Perform initial health check on all worker pools - * @deprecated Use ready() promise instead - */ - private performInitialHealthCheck() { - logger.debug('system', '[WorkerManager] Performing initial health check on all worker pools'); + logger.info('worker', `Starting ${this.workerType} job on worker`, { + workerType: this.workerType, + jobId: job.jobId, + threadId: worker.threadId, + retryCount, + queueLength: this.jobQueue.length, + }); - const embeddingStats = this.embeddingPool.getStats(); - const matchingStats = this.matchingPool.getStats(); + const messageHandler = (message: any) => { + if (message.type === 'job_completed' && message.jobId === job.jobId) { + worker.removeListener('message', messageHandler); + this.activeJobs.delete(worker); // Remove from active tracking + this.markWorkerAvailable(worker); - logger.debug( - 'system', - `[WorkerManager] Health check complete: - - Embedding workers: ${embeddingStats.totalWorkers} total, ${embeddingStats.availableWorkers} available, ${embeddingStats.pendingHealthChecks} pending health checks - - Matching workers: ${matchingStats.totalWorkers} total, ${matchingStats.availableWorkers} available, ${matchingStats.pendingHealthChecks} pending health checks`, - ); + logger.info('worker', `${this.workerType} job completed successfully`, { + workerType: this.workerType, + jobId: job.jobId, + threadId: worker.threadId, + }); - if (embeddingStats.totalWorkers === 0 || matchingStats.totalWorkers === 0) { - logger.error('system', '[WorkerManager] WARNING: Some worker pools have no active workers!'); - } - } + resolve(message.result || 'Job completed'); + this.processNextJob(); + } else if (message.type === 'error' && message.jobId === job.jobId) { + worker.removeListener('message', messageHandler); + this.activeJobs.delete(worker); // Remove from active tracking + this.markWorkerAvailable(worker); - /** - * Enqueue an embedding job - */ - public enqueueEmbedding(job: EmbeddingJob, options?: JobQueueItem['options']) { - this.embeddingPool.enqueue(job, options); - } + logger.info('worker', `${this.workerType} job failed, handling failure`, { + workerType: this.workerType, + jobId: job.jobId, + threadId: worker.threadId, + error: message.error, + }); - /** - * Enqueue a matching job - */ - public enqueueMatching(job: MatchingJob, options?: JobQueueItem['options']) { - this.matchingPool.enqueue(job, options); - } + this.handleJobFailure(queueItem, new Error(message.error)); + this.processNextJob(); + } else if (message.type === 'job' && message.job === 'reason') { + this.handleReasoning(job, message); + } + }; - /** - * Generic enqueue method that routes jobs based on type - */ - public enqueue(job: any, workerScript: workerTypes, options?: JobQueueItem['options']) { - if (workerScript === 'embedder') { - this.enqueueEmbedding(job as EmbeddingJob, options); - } else if (workerScript === 'matcher') { - this.enqueueMatching(job as MatchingJob, options); - } else { - throw new Error(`Unknown worker type: ${workerScript}`); - } + worker.on('message', messageHandler); + worker.postMessage(job); } - /** - * Get statistics for all worker pools - */ - public getStats() { - return { - embedding: this.embeddingPool.getStats(), - matching: this.matchingPool.getStats(), - }; + private markWorkerAvailable(worker: Worker): void { + this.busyWorkers.delete(worker); + this.availableWorkers.add(worker); } - /** - * Shutdown all worker pools - */ - public async shutdown() { - logger.debug('system', '[WorkerManager] Shutting down all worker pools'); + private handleJobFailure(queueItem: JobQueueItem, error: Error): void { + const { job, resolve, reject, retryCount } = queueItem; - await Promise.all([this.embeddingPool.shutdown(), this.matchingPool.shutdown()]); - } -} + if (retryCount < 2) { + // Requeue job with incremented retry count + logger.info('worker', `Retrying ${this.workerType} job`, { + workerType: this.workerType, + jobId: job.jobId, + retryCount: retryCount + 1, + maxRetries: 2, + error: error.message, + }); -/** - * Handle reasoning requests from matching workers - * This maintains the same functionality as the old system - */ -async function handleReasoning(job: any, message: any) { - const jobId = job.jobId || 'unknown'; + this.jobQueue.unshift({ + job, + resolve, + reject, + retryCount: retryCount + 1, + }); + } else { + // Final failure after 2 retries + logger.error( + 'worker', + `${this.workerType} job failed permanently after max retries`, + new WorkerError(this.workerType, job.jobId, error), + ); - try { - logger.debug('system', `[WorkerManager] Processing reasoning for job ${jobId}`); + reject(new WorkerError(this.workerType, job.jobId, error)); + } + } + private async handleReasoning(job: any, message: any): Promise { const finalMatches = []; - // Add reasoning before saving in DB for (const [task, matches] of Object.entries(message.workload)) { try { - const taskMatches = await addReason< - Match & { - taskId: string; - taskText: string; - type: 'name' | 'description' | 'proficiencyLevel'; - alignment: 'contradicting' | 'neutral' | 'aligning'; - } - >( - matches as (Match & { - taskId: string; - taskText: string; - type: 'name' | 'description' | 'proficiencyLevel'; - alignment: 'contradicting' | 'neutral' | 'aligning'; - })[], - task, - ); + const taskMatches = await addReason(matches as any[], task); finalMatches.push(...taskMatches); } catch (error) { - const reasoningError = new ReasoningError( - (matches as any[]).length, - error instanceof Error ? error : new Error(String(error)), - ); - - logger.workerError('reasoning_task_failure', reasoningError, { - jobId, - task: task.substring(0, 100) + (task.length > 100 ? '...' : ''), - matchCount: (matches as any[]).length, - }); - - // Continue with original matches without reasoning finalMatches.push(...(matches as any[])); } } - // Save results in database - let db; - try { - db = getDB(job.dbName); - } catch (error) { - throw new DatabaseError('getDB', error instanceof Error ? error : new Error(String(error))); - } - + const db = getDB(job.dbName); for (const match of finalMatches) { try { db.addMatchResult({ - jobId: job.jobId, + jobId: match.jobId, taskId: match.taskId, taskText: match.taskText, competenceId: match.competenceId, @@ -943,53 +435,83 @@ async function handleReasoning(job: any, message: any) { distance: match.distance, text: match.text, type: match.type, - reason: match.reason, alignment: match.alignment, + reason: match.reason, }); } catch (error) { - throw new DatabaseError( - 'addMatchResult', - error instanceof Error ? error : new Error(String(error)), - ); + // Continue on individual match save failure } } - // Update job status to completed try { db.updateJobStatus(job.jobId, 'completed'); - - logger.debug( - 'system', - `[WorkerManager] Job ${jobId} completed successfully with ${finalMatches.length} matches`, - ); } catch (error) { - throw new DatabaseError( - 'updateJobStatus', - error instanceof Error ? error : new Error(String(error)), - ); + // Log but don't fail } - } catch (error) { - logger.workerError( - 'reasoning_handler_failure', - error instanceof Error ? error : new Error(String(error)), - { jobId }, - ); + } - // Try to mark job as failed - try { - const db = getDB(job.dbName); - db.updateJobStatus(job.jobId, 'failed'); - } catch (dbError) { - logger.workerError( - 'job_failure_update_error', - new DatabaseError( - 'updateJobStatus', - dbError instanceof Error ? dbError : new Error(String(dbError)), - ), - { jobId }, - ); + public async shutdown(): Promise { + logger.info('worker', `Shutting down ${this.workerType} pool`, { + workerType: this.workerType, + activeJobs: this.activeJobs.size, + queuedJobs: this.jobQueue.length, + totalWorkers: this.workers.length, + }); + + this.jobQueue.forEach((item) => item.reject(new Error('Shutting down'))); + this.activeJobs.forEach((item) => item.reject(new Error('Shutting down'))); // Reject active jobs too + this.workerDeathTimers.forEach((timer) => clearTimeout(timer)); + this.workers.forEach((worker) => worker.terminate()); + + this.jobQueue.length = 0; + this.activeJobs.clear(); + this.workerDeathTimers.clear(); + this.workers.length = 0; + this.availableWorkers.clear(); + this.busyWorkers.clear(); + } +} + +/** + * WorkerManager - High-level interface for managing worker pools + */ +class WorkerManager { + private embeddingPool: WorkerPool; + private matchingPool: WorkerPool; + + constructor() { + logger.info('worker', 'Initializing WorkerManager', { + embeddingWorkers, + matchingWorkers, + }); + + this.embeddingPool = new WorkerPool('embedder', embeddingWorkers); + this.matchingPool = new WorkerPool('matcher', matchingWorkers); + } + + public async enqueue(job: EmbeddingJob | MatchingJob, workerType: workerTypes): Promise { + logger.info('worker', `Enqueuing job to ${workerType} pool`, { + workerType, + jobId: job.jobId, + }); + + if (workerType === 'embedder') { + return this.embeddingPool.executeJob(job); + } else if (workerType === 'matcher') { + return this.matchingPool.executeJob(job); + } else { + const error = new Error(`Unknown worker type: ${workerType}`); + logger.error('worker', 'Unknown worker type requested', error, { + workerType, + jobId: job.jobId, + }); + throw error; } } + + public async shutdown(): Promise { + await Promise.all([this.embeddingPool.shutdown(), this.matchingPool.shutdown()]); + } } // Export singleton instance From ea962b63fe4ad5a193a8846742f355b4ea0e6c90 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:10:54 +0200 Subject: [PATCH 29/48] fix: remove unused cross-encoder from startup check --- src/competence-matcher/src/utils/huggingface.ts | 6 +++--- src/competence-matcher/src/worker/matcher.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/competence-matcher/src/utils/huggingface.ts b/src/competence-matcher/src/utils/huggingface.ts index cd43026e1..0898b8609 100644 --- a/src/competence-matcher/src/utils/huggingface.ts +++ b/src/competence-matcher/src/utils/huggingface.ts @@ -24,15 +24,15 @@ export async function ensureAllHuggingfaceModelsAreAvailable() { logger.debug('model', 'Initialising zero-shot semantic opposites model...'); await ZeroShotSemanticOpposites.getInstance(); - logger.debug('model', 'Initialising cross-encoder model...'); - await CrossEncoder.getInstance(); + // logger.debug('model', 'Initialising cross-encoder model...'); + // await CrossEncoder.getInstance(); logger.modelInfo('All HuggingFace models initialised successfully'); // Delete instances to free up memory as they will be reloaded in worker threads Embedding.deleteInstance(); ZeroShotSemanticOpposites.deleteInstance(); - CrossEncoder.deleteInstance(); + // CrossEncoder.deleteInstance(); } catch (error) { throw new HuggingFaceModelError( 'unknown', // We don't know which specific model failed - will maybe add later diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index da29def32..ec1e29423 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -4,7 +4,7 @@ import { withJobUpdates, workerLogger, startHeartbeat } from '../utils/worker'; import { addReason } from '../tasks/reason'; import { Match, MatchingJob } from '../utils/types'; import ZeroShot, { labels } from '../tasks/semantic-zeroshot'; -import CrossEncoder from '../tasks/cross-encode'; +// import CrossEncoder from '../tasks/cross-encode'; import { config } from '../config'; /** From c5220d7bd8799ab82f94c25d0084ebd60c2474ff Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:21:29 +0200 Subject: [PATCH 30/48] add production scripts for detached mode and stopping the server --- src/competence-matcher/package.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index e0072315e..08c4ad7fc 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -6,7 +6,9 @@ "scripts": { "dev": "ts-node-dev --respawn --transpile-only --watch .env src/server.ts", "build": "tsc", - "production": "node dist/server.js" + "production": "node dist/server.js", + "production:detached": "nohup node dist/server.js >/dev/null 2>&1 & echo $! > ./server.pid", + "production:stop": "if [ -f ./server.pid ]; then kill $(cat ./server.pid) && rm -f ./server.pid; else echo 'no pid file'; fi" }, "repository": { "type": "git", From e5e95f1305b29eed773b7eb69a0b0254a1c538b1 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:35:32 +0200 Subject: [PATCH 31/48] fix: enhance production scripts for better server management --- src/competence-matcher/package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index 08c4ad7fc..68f8d79ea 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -6,9 +6,9 @@ "scripts": { "dev": "ts-node-dev --respawn --transpile-only --watch .env src/server.ts", "build": "tsc", - "production": "node dist/server.js", - "production:detached": "nohup node dist/server.js >/dev/null 2>&1 & echo $! > ./server.pid", - "production:stop": "if [ -f ./server.pid ]; then kill $(cat ./server.pid) && rm -f ./server.pid; else echo 'no pid file'; fi" + "production:start": "if [ -f ./server.pid ]; then PID=$(cat ./server.pid); if ps -p $PID > /dev/null 2>&1 && ps -p $PID -o args= | grep -q node; then echo \"Server already running (pid $PID)\"; exit 1; fi; fi; node dist/server.js", + "production:detached": "if [ -f ./server.pid ]; then PID=$(cat ./server.pid); if ps -p $PID > /dev/null 2>&1 && ps -p $PID -o args= | grep -q node; then echo \"Server already running (pid $PID)\"; exit 1; fi; fi; nohup node dist/server.js >/dev/null 2>&1 & echo $! > ./server.pid", + "production:stop": "if [ -f ./server.pid ]; then PID=$(cat ./server.pid); if ps -p $PID > /dev/null 2>&1; then kill $PID >/dev/null 2>&1 || true; sleep 1; fi; rm -f ./server.pid; else echo \"No server.pid file found\"; fi" }, "repository": { "type": "git", From c8a79ee1eeb7139e2da26d342f81aee541693865 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:36:26 +0200 Subject: [PATCH 32/48] fix: update .gitignore to include additional ignored files and directories --- .gitignore | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/.gitignore b/.gitignore index 731f93588..6ca80c0f4 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,73 @@ src/competence-matcher/src/db/dbs src/competence-matcher/src/models src/competence-matcher/.env src/competence-matcher/logs +node_modules/ +.yarn-cache/ +/*.zip +coverage/ + +# PROCEED files +src/engine/native/node/**/data_files +src/engine/native/node/native-config/src/config.json + +src/engine/native/web/server/public/* +src/engine/native/web/server/node_modules + +src/engine/e2e_tests/testEngine/config.json + +# OS files +.DS_Store +.directory + +# Build dirs +jsdoc/output_html/ +!build/index.html +build/* + +# local env files +.env.local +.env.*.local + +# Log files +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# yarn is used as package manager, so npm files +package-lock.json + +# Editor directories and files +.idea +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? + +# git modules +.gitmodules + +# env configs +./src/management-system/src/backend/server/environment-configurations/ +./src/backend/server/environment-configurations/ + +# Playwright +/test-results/ +/playwright-report/ +/playwright/.cache/ +dataEval.json +/blob-report/ + +# Ignore generated credentials from google-github-actions/auth +gha-creds-*.json + +# Ignore custom claude project files +CLAUDE.md + +# Matching models & DB +src/competence-matcher/dist +src/competence-matcher/src/db/dbs +src/competence-matcher/src/models +src/competence-matcher/.env +src/competence-matcher/logs +src/competence-matcher/server.pid From eed56cff3ddcbbf786146af214e82e6d0193ba4a Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sat, 1 Nov 2025 15:33:03 +0100 Subject: [PATCH 33/48] refactor: update production scripts and enhance matching algorithm parameters --- src/competence-matcher/package.json | 4 +- src/competence-matcher/src/config.ts | 10 +- src/competence-matcher/src/utils/prompts.ts | 175 ++++--------------- src/competence-matcher/src/worker/matcher.ts | 18 +- 4 files changed, 55 insertions(+), 152 deletions(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index 68f8d79ea..7aa530aa7 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -6,7 +6,7 @@ "scripts": { "dev": "ts-node-dev --respawn --transpile-only --watch .env src/server.ts", "build": "tsc", - "production:start": "if [ -f ./server.pid ]; then PID=$(cat ./server.pid); if ps -p $PID > /dev/null 2>&1 && ps -p $PID -o args= | grep -q node; then echo \"Server already running (pid $PID)\"; exit 1; fi; fi; node dist/server.js", + "production": "if [ -f ./server.pid ]; then PID=$(cat ./server.pid); if ps -p $PID > /dev/null 2>&1 && ps -p $PID -o args= | grep -q node; then echo \"Server already running (pid $PID)\"; exit 1; fi; fi; node dist/server.js", "production:detached": "if [ -f ./server.pid ]; then PID=$(cat ./server.pid); if ps -p $PID > /dev/null 2>&1 && ps -p $PID -o args= | grep -q node; then echo \"Server already running (pid $PID)\"; exit 1; fi; fi; nohup node dist/server.js >/dev/null 2>&1 & echo $! > ./server.pid", "production:stop": "if [ -f ./server.pid ]; then PID=$(cat ./server.pid); if ps -p $PID > /dev/null 2>&1; then kill $PID >/dev/null 2>&1 || true; sleep 1; fi; rm -f ./server.pid; else echo \"No server.pid file found\"; fi" }, @@ -41,4 +41,4 @@ "engines": { "node": ">=23.5.0" } -} \ No newline at end of file +} diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index 1864b3ff3..5a2ae706a 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -1,8 +1,6 @@ import dotenv from 'dotenv'; dotenv.config({ path: '.env' }); -import * as os from 'node:os'; - export const config = { dbPath: process.env.DB_PATH || 'src/db/dbs/', embeddingModel: process.env.EMBEDDING_MODEL || 'onnx-community/Qwen3-Embedding-0.6B-ONNX', @@ -44,4 +42,12 @@ export const config = { maxOllamaRetries: parseInt(process.env.MAX_OLLAMA_RETRIES || '5', 10), // Maximum model pull retry attempts ollamaRetryDelay: parseInt(process.env.OLLAMA_RETRY_DELAY || '30', 10) * 1_000, // Base delay between retries in seconds (converted to ms) ollamaRetryBackoff: parseFloat(process.env.OLLAMA_RETRY_BACKOFF || '1.5'), // Exponential backoff multiplier + // Matching algorithm scaling parameters + matchDistanceOffset: parseFloat(process.env.MATCH_DISTANCE_OFFSET || '0.45'), // Offset subtracted from match distance before scaling + matchDistanceMultiplier: parseFloat(process.env.MATCH_DISTANCE_MULTIPLIER || '2'), // Multiplier for distance after offset + contradictionThreshold: parseFloat(process.env.CONTRADICTION_THRESHOLD || '0.3'), // Threshold for contradiction detection + entailmentThreshold: parseFloat(process.env.ENTAILMENT_THRESHOLD || '0.55'), // Threshold for entailment detection + alignmentDistanceThreshold: parseFloat(process.env.ALIGNMENT_DISTANCE_THRESHOLD || '0.65'), // Minimum distance required for alignment boost + alignmentBoostMultiplier: parseFloat(process.env.ALIGNMENT_BOOST_MULTIPLIER || '1.2'), // Multiplier to boost distance for aligning matches + neutralReductionMultiplier: parseFloat(process.env.NEUTRAL_REDUCTION_MULTIPLIER || '0.65'), // Multiplier to reduce distance for neutral matches }; diff --git a/src/competence-matcher/src/utils/prompts.ts b/src/competence-matcher/src/utils/prompts.ts index eab19af6c..8ccb1e280 100644 --- a/src/competence-matcher/src/utils/prompts.ts +++ b/src/competence-matcher/src/utils/prompts.ts @@ -167,27 +167,7 @@ export const SEMANTIC_SPLITTER: Message[] = [ // }, // ]; -const MATCH_REASON_INSTRUCT_OLD: Message = { - role: 'system', - content: ` - You are an expert in generating reasons for matching scores and their alignment between tasks and competences. - Your task is to generate a reason for the matching score between a task and a competence. - In addition to the score - which is the normalized similarity score between the task and competence - you also receive an alignment label which can be one of 'aligning', 'neutral' or 'contradicting'. - The alignment label indicates whether the task and competence are well aligned ('aligning'), not really related, so do not match well nor badly ('neutral') or are in conflict with each other ('contradicting'). - Generally speaking, a score of 0 means not suited, where not suited can either mean, not suited at all or just not really suited (i.e. the capability and task either contradict or are not overlapping in terms of competences, e.g. they are unrelated). - A score of 1 means perfectly suited (i.e. the capability fully covers the task). - Hence, everything larger than 0 already indicates some degree of suitability. - A match score of e.g. 0.15 is already slightly suited, 0.5 indicates that the resource is somewhat well suited to perform the task, 0.7 would indicate that the resource is quite well suited to perform the task, and everything above 0.85 and 1.0 means close to perfectly suited. - The reason should be one to three short, concise sentence that explain why the task and competence match as well as they did or why they did not match that well. - Do not mention the similarity score or alignment label in your response. - The reason should be based on the text of the task and the competence and their estimated normalized similarity score and alignment. - The similarity score is a number between 0 and 1, where 0 means no similarity and 1 means perfect similarity. - Do not mention the similarity score in your response. - Do not mention the alignment label in your response. - `, -}; - -const MATCH_REASON_EXAMPLES: Message[] = [ +let MATCH_REASON_EXAMPLES: Message[] = [ { role: 'user', content: ` @@ -203,7 +183,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The statements match very well because the task requires operating CNC milling machines, which is exactly what the competence is about. `, }, - { role: 'user', content: ` @@ -224,8 +203,8 @@ const MATCH_REASON_EXAMPLES: Message[] = [ content: ` Task: Assemble circuit boards according to schematic diagrams. Competence: Basic knowledge of electronics and soldering skills. - Similarity Score: 0.08 - Alignment: aligning + Similarity Score: 0.18 + Alignment: neutral `, }, { @@ -265,13 +244,12 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence directly covers leading Scrum teams and facilitating ceremonies, which matches the task's requirements precisely. `, }, - { role: 'user', content: ` Task: Develop native iOS applications in Swift and ship to the App Store. Competence: Experienced Android engineer, Kotlin and Jetpack Compose. - Similarity Score: 0.25 + Similarity Score: 0.40 Alignment: neutral `, }, @@ -281,7 +259,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence demonstrates mobile-app development experience but on a different platform and language, so it provides some transferable skills but not a direct match. `, }, - { role: 'user', content: ` @@ -297,7 +274,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence combines native German proficiency with a legal translation credential, matching the task's need for accurate legal terminology. `, }, - { role: 'user', content: ` @@ -313,7 +289,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence includes both the certification and hands-on payroll experience required to perform payroll calculation and filings. `, }, - { role: 'user', content: ` @@ -329,7 +304,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ Both roles involve operating heavy equipment, but the specific machinery and skills differ, so the relevance is limited. `, }, - { role: 'user', content: ` @@ -345,7 +319,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence shows solid testing experience but in a different language and framework, making it partly relevant but not a perfect fit. `, }, - { role: 'user', content: ` @@ -361,7 +334,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence demonstrates customer-facing communication and support experience that transfers well to voice support, though the channel differs. `, }, - { role: 'user', content: ` @@ -377,7 +349,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence explicitly covers phlebotomy certification and clinical practice, which directly matches the task's duties. `, }, - { role: 'user', content: ` @@ -393,7 +364,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence shows the required safety certification and audit experience needed to perform hazard assessments and documentation. `, }, - { role: 'user', content: ` @@ -409,7 +379,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence provides valuable finance and data skills, but lacks the specific low-latency C++ systems expertise required for the task. `, }, - { role: 'user', content: ` @@ -425,7 +394,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence combines visual branding experience and leadership, which fits the task of leading designers to create brand assets. `, }, - { role: 'user', content: ` @@ -441,7 +409,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The task requires specialized medical knowledge and terminology that the competence does not provide. `, }, - { role: 'user', content: ` @@ -473,23 +440,21 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence explicitly covers Nginx configuration, TLS certificate handling, and reverse-proxy setups, which directly match the task. That operational experience maps to the task's technical requirements. Overall this is a strong, practical fit. `, }, - { role: 'user', content: ` Task: Write automated end-to-end tests for a React web application using Playwright. Competence: Experienced in unit/integration testing in React and in end-to-end tests using Selenium. - Similarity Score: 0.46 - Alignment: neutral + Similarity Score: 0.63 + Alignment: aligning `, }, { role: 'assistant', content: ` - The competence shows solid testing experience and familiarity with React, which transfers to end-to-end testing concepts. However, the specific test framework differs and Selenium's patterns are not identical to Playwright's modern APIs. The fit is moderately useful but incomplete without Playwright-specific experience. + The competence shows solid testing experience and familiarity with React, which transfers to end-to-end testing concepts. While the specific test framework differs and Selenium's patterns are not identical to Playwright's modern APIs, the fit is definitely relevant although not a perfect match. `, }, - { role: 'user', content: ` @@ -535,52 +500,35 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence demonstrates direct experience implementing data-protection policies and performing impact assessments, which correspond to the task's key responsibilities. Those policy and assessment skills indicate strong familiarity with GDPR-related workflows and controls. The competence is therefore a very good fit for the task. `, }, - { role: 'user', content: ` Task: Drive a passenger shuttle daily and secure vehicle inspections. Competence: Medical restriction explicitly prohibiting driving. - Similarity Score: 0.60 + Similarity Score: 0.00 Alignment: contradicting `, }, { role: 'assistant', content: ` - Although the numeric score suggests some overlap, the competence text explicitly prohibits driving while the task requires operating a vehicle. This direct conflict with the task's core requirement prevents the candidate from performing the role. The competence therefore does not meet the task. + The task requires driving a passenger shuttle, but the competence indicates a medical restriction against driving, creating a direct conflict. `, }, ///////////////// - { - role: 'user', - content: ` - Task: Assemble circuit boards according to schematic diagrams. - Competence: Basic knowledge of electronics and soldering skills. - Similarity Score: 0.08 - Alignment: aligning - `, - }, - { - role: 'assistant', - content: ` - The statements only slightly match because assembling circuit boards requires more specific hands-on PCB assembly experience than basic electronics knowledge. Basic soldering helps, but the competence doesn't explicitly show the detailed assembly practice the task expects. The overlap is therefore minimal. - `, - }, - { role: 'user', content: ` Task: Translate engineering user manuals from Chinese to English, preserving technical terms and measurements. Competence: Professional Chinese-to-English translator with experience in marketing and legal documents, no engineering background. - Similarity Score: 0.12 - Alignment: neutral + Similarity Score: 0.79 + Alignment: aligning `, }, { role: 'assistant', content: ` - The competence provides strong language and translation skills but lacks the technical engineering background needed to preserve specialized terminology and measurements. That domain gap limits suitability for technical manual translation without subject-matter support. The fit is therefore only slight. + The competence demonstrates strong translation skills between Chinese and English, which are essential for the task. However, the lack of engineering-specific experience means some technical terminology may require additional research. Overall, the candidate is quite well suited but may need to upskill on engineering concepts. `, }, { @@ -588,14 +536,14 @@ const MATCH_REASON_EXAMPLES: Message[] = [ content: ` Task: Administer Microsoft SQL Server databases, tune queries and manage backups. Competence: Extensive experience with MySQL and PostgreSQL administration. - Similarity Score: 0.40 + Similarity Score: 0.45 Alignment: neutral `, }, { role: 'assistant', content: ` - The competence indicates strong database administration skills and general principles that transfer across systems, but it lacks direct experience with Microsoft SQL Server-specific tools and features. That gap will require adaptation to server-specific administration and tuning. The candidate is moderately relevant but not an immediate fit. + The competence shows strong database administration skills that transfer conceptually to SQL Server. However, the specific platform differences mean the candidate will need to learn SQL Server's unique features and tools. The fit is partial: good foundational skills but not yet complete for SQL Server-specific tasks. `, }, { @@ -613,7 +561,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence brings directly relevant front-end and React experience, including bundle optimization and component improvements. However, the candidate's limited exposure to advanced profiling workflows means they may need to upskill for deep-render hotspot analysis. Overall, they are reasonably well placed to improve performance but may require some additional profiling experience. `, }, - { role: 'user', content: ` @@ -629,7 +576,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence demonstrates solid SQL and ETL experience that transfers to analytics work, and understanding of query design is directly useful for Redshift. The lack of Redshift-specific tuning and cluster knowledge is a remaining gap that will affect large-scale optimizations. The candidate is generally well suited but will need platform-specific tuning skills. `, }, - { role: 'user', content: ` @@ -645,7 +591,6 @@ const MATCH_REASON_EXAMPLES: Message[] = [ The competence shows relevant onboarding and customer engagement experience that transfers to SaaS adoption activities. However, the difference between on-premise manual onboarding and recurring SaaS flows means playbook-driven, product-led onboarding skills may be missing. The candidate can perform many onboarding tasks but will need to adapt to SaaS-specific tooling and processes. `, }, - { role: 'user', content: ` @@ -658,10 +603,9 @@ const MATCH_REASON_EXAMPLES: Message[] = [ { role: 'assistant', content: ` - The competence is strong on model development and evaluation, which covers a critical portion of the workflow. However, the task requires production deployment skills—CI/CD pipelines, containerization, and monitoring—that are not evident in the competence. The fit is partial: good for the modeling side but not yet complete for end-to-end deployment. + The competence demonstrates strong machine-learning model development skills, which are foundational for production deployment. However, the lack of DevOps and infrastructure experience means the candidate may struggle with CI/CD pipelines, containerization, and monitoring aspects. The fit is partial: good ML skills but significant gaps in deployment expertise. `, }, - { role: 'user', content: ` @@ -679,6 +623,23 @@ const MATCH_REASON_EXAMPLES: Message[] = [ }, ]; +// Add intermediate system instructions for more consistent reasoning +const systemIntermezzo: Message = { + role: 'system', + content: ` + Remember to base your reason on both the similarity score and the alignment label without mentioning them explicitly. + Scores above 0.6 generally indicate a good up to perfect suitability. + Scores between 0.4 and 0.6 generally indicate some up to a good degree of suitability. + Scores between 0.2 and 0.4 generally indicate some degree of suitability. + Scores below or equal to 0.2 generally indicate little to no suitability. + `, +}; + +// Add after every assistant message +MATCH_REASON_EXAMPLES = MATCH_REASON_EXAMPLES.flatMap((msg) => + msg.role === 'assistant' ? [msg, systemIntermezzo] : [msg], +); + const MATCH_REASON_INSTRUCT: Message = { role: 'system', content: ` @@ -712,78 +673,6 @@ Output constraints: `, }; -// export const MATCH_REASON: Message[] = [MATCH_REASON_INTRUCT, ...MATCH_REASON_EXAMPLES]; - -const MATCH_REASON_INTSRUCT_2: Message = { - role: 'system', - content: ` - You are an expert in generating matching scores based on reason between tasks and competences. - Your task is to generate a score, how well the resource with the respective capability is suited to fulfill the given task. - The score should be a number (floating point) between 0 and 1, where 0 means either not suited (i.e. the capability and task either contradict or are not overlapping in terms of competences, e.g. they are unrelated) and 1 means perfectly suited (i.e. the capability fully covers the task). - So values larger than 0 already indicate some degree of suitability, while values close to 1 indicate a very good match. - Something that is neither well suited nor unsuited should still be rated with 0. - 0.25 would indicate that the resource is only slightly suited to perform the task. - So 0.5 is not a neutral value, but rather indicates that the resource is somewhat well suited to perform the task, but not very well suited. - 0.75 would indicate that the resource is quite well suited to perform the task, but not perfectly suited. - The reason should be one to three short, concise sentence that explain why the task and competence match as well as they did or why they did not match that well. - The reason should be based on the text of the task and the competence and their estimated normalized similarity score and alignment. - The similarity score is a number between 0 and 1. - Your response should be in the following format: - - ${splittingSymbol} - - `, -}; - -const MATCH_REASON_EXAMPLES_2: Message[] = [ - { - role: 'user', - content: ` - Task: Operate CNC milling machines to produce precision metal parts. - Competence: Experience with CNC milling machines and precision machining. - `, - }, - { - role: 'assistant', - content: ` - 0.95 - ${splittingSymbol} - The statements match very well because the task requires operating CNC milling machines, which is exactly what the competence is about. - `, - }, - - { - role: 'user', - content: ` - Task: Delivering packages to customers on time. Driving a delivery van safely through city traffic. Loading and unloading packages efficiently. Communicating with customers professionally. Planning optimal delivery routes using GPS technology. - Competence: Has no drivers license and cannot operate vehicles. - `, - }, - { - role: 'assistant', - content: ` - 0.0 - ${splittingSymbol} - The statements do not match. The task requires driving a delivery van, but the competence indicates that the person cannot operate vehicles at all. - `, - }, - { - role: 'user', - content: ` - Task: Prepare raw materials for production. - Competence: Experience with inventory management and supply chain logistics. - `, - }, - { - role: 'assistant', - content: ` - 0.30 - ${splittingSymbol} - The statements have a relativly low match because preparing raw materials is a basic task that does not require advanced inventory management or supply chain logistics skills. - `, - }, -]; - export const MATCH_REASON: Message[] = [MATCH_REASON_INSTRUCT, ...MATCH_REASON_EXAMPLES]; /** diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index ec1e29423..e7d62ddfc 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -108,7 +108,11 @@ parentPort.on('message', async (message: any) => { let flag = 'neutral'; // Default flag // Balance distance - let newDistance = Math.min(1, Math.max(0, match.distance - 0.45) * 2); + let newDistance = Math.min( + 1, + Math.max(0, match.distance - config.matchDistanceOffset) * + config.matchDistanceMultiplier, + ); // Get Alignment via Zero-Shot const sentiment = await ZeroShot.nliBiDirectional(description, match.text); @@ -124,20 +128,24 @@ parentPort.on('message', async (message: any) => { // First: Contradicting? if ( sentiment.ranking[0] == 'contradict' || - sentiment.contradict > 0.3 || + sentiment.contradict > config.contradictionThreshold || contradiction.contradicting ) { flag = 'contradicting'; newDistance = 0.0; // Second: Aligning? - } else if (sentiment.entail > 0.55 && match.distance > 0.65 && alignment.aligning) { + } else if ( + sentiment.entail > config.entailmentThreshold && + match.distance > config.alignmentDistanceThreshold && + alignment.aligning + ) { flag = 'aligning'; // Boost similarity-based distance - newDistance = Math.min(1, newDistance * 1.5); + newDistance = Math.min(1, newDistance * config.alignmentBoostMultiplier); } else { flag = 'neutral'; // Reduce distance for neutral - newDistance *= 0.65; + newDistance *= config.neutralReductionMultiplier; } // Store match result for reasoning workaround From c218461caa09480230c181587f0e9ed67f12a514 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sat, 1 Nov 2025 17:20:01 +0100 Subject: [PATCH 34/48] Cleanup tensors and init modle changes --- src/competence-matcher/src/config.ts | 2 +- src/competence-matcher/src/tasks/embedding.ts | 11 ++++ src/competence-matcher/src/worker/embedder.ts | 62 ++++++++++++------- src/competence-matcher/src/worker/matcher.ts | 59 +++++++++++------- .../src/worker/worker-manager.ts | 47 ++++++++++++-- 5 files changed, 133 insertions(+), 48 deletions(-) diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index 5a2ae706a..b86cb38c4 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -22,7 +22,7 @@ export const config = { embeddingWorkers: parseInt(process.env.EMBEDDING_WORKERS || '1', 10), // Number of embedding workers to keep alive matchingWorkers: parseInt(process.env.MATCHING_WORKERS || '1', 10), // Number of matching workers to keep alive workerHeartbeatInterval: parseInt(process.env.WORKER_HEARTBEAT_INTERVAL || '30', 10) * 1_000, // Worker heartbeat interval in seconds (converted to ms) - how often workers send heartbeats - workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '45', 10) * 1_000, // Worker death timeout in seconds (converted to ms) - how long to wait before considering worker dead + workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '120', 10) * 1_000, // Worker death timeout in seconds (converted to ms) - how long to wait before considering worker dead (increased from 45s to 120s to accommodate model loading and job execution) maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds logLevel: process.env.LOG_LEVEL || 'INFO', // Levels: 'DEBUG', 'INFO', 'WARN', 'ERROR' logTypes: process.env.LOG_TYPES || 'server,request,worker,database,model,system', diff --git a/src/competence-matcher/src/tasks/embedding.ts b/src/competence-matcher/src/tasks/embedding.ts index 4148e6980..9ab5d42ff 100644 --- a/src/competence-matcher/src/tasks/embedding.ts +++ b/src/competence-matcher/src/tasks/embedding.ts @@ -41,6 +41,17 @@ export default class Embedding extends TransformerPipeline { // Handle job messages @@ -38,27 +76,7 @@ parentPort.on('message', async (message: any) => { taskCount: job.tasks?.length || 0, }); - // ensure models are initialised (but do not run this for health_check) - try { - await ensureModelsInitialised(); - } catch (err) { - workerLogger( - job.jobId || 'system', - 'debug', - 'Embedder worker failed to initialize models', - { threadId, jobId: job.jobId }, - err instanceof Error ? err : new Error(String(err)), - ); - // Notify parent and exit or mark job failed - parentPort!.postMessage({ - type: 'error', - jobId: job.jobId, - error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, - }); - // still send job_completed so worker pool can continue - parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); - return; - } + // Models are already initialized on worker startup, so we skip ensureModelsInitialised here workerLogger(job.jobId, 'debug', `Starting embedding job with ${job.tasks.length} tasks`, { threadId, diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index e7d62ddfc..0759eccda 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -21,7 +21,7 @@ async function ensureModelsInitialised() { await Embedding.getInstance(); await ZeroShot.getInstance(); modelsInitialised = true; - workerLogger('system', 'debug', 'Matcher worker online', { threadId }); + workerLogger('system', 'info', 'Matcher worker models loaded', { threadId }); } catch (err) { // Bubble up so job handling can report the error throw err; @@ -31,6 +31,42 @@ async function ensureModelsInitialised() { // Start heartbeat immediately startHeartbeat('matcher', config.workerHeartbeatInterval); +// Pre-load models on worker startup to avoid timeout on first job +(async function initializeWorker() { + try { + workerLogger('system', 'info', 'Matcher worker initializing - loading models...', { threadId }); + await ensureModelsInitialised(); + + // Signal to main thread that worker is ready + parentPort!.postMessage({ + type: 'ready', + threadId, + workerType: 'matcher', + }); + + workerLogger('system', 'info', 'Matcher worker fully initialized and ready for jobs', { + threadId, + }); + } catch (error) { + workerLogger( + 'system', + 'error', + 'Matcher worker failed to initialize', + { threadId }, + error instanceof Error ? error : new Error(String(error)), + ); + + parentPort!.postMessage({ + type: 'initialization_failed', + threadId, + workerType: 'matcher', + error: error instanceof Error ? error.message : String(error), + }); + + process.exit(1); + } +})(); + // Set up job message handler parentPort.on('message', async (message: any) => { // Handle job messages @@ -43,26 +79,7 @@ parentPort.on('message', async (message: any) => { listId: job.listId, }); - try { - await ensureModelsInitialised(); - } catch (err) { - workerLogger( - job.jobId || 'system', - 'debug', - 'Matcher worker failed to initialize models', - { threadId, jobId: job.jobId }, - err instanceof Error ? err : new Error(String(err)), - ); - // Notify parent and exit or mark job failed - parentPort!.postMessage({ - type: 'error', - jobId: job.jobId, - error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, - }); - // still send job_completed so worker pool can continue - parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); - return; - } + // Models are already initialized on worker startup, so we skip ensureModelsInitialised here workerLogger(job.jobId, 'debug', `Starting matching job with ${job.tasks.length} tasks`, { threadId, diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index b036e1696..8f98ed1f7 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -78,12 +78,16 @@ class WorkerPool { busyWorkers: this.busyWorkers.size, }); - // Set initial death timer + // Set initial death timer (longer timeout for initialization) const deathTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); this.workerDeathTimers.set(worker, deathTimer); worker.on('message', (message: any) => { - if (message.type === 'heartbeat') { + if (message.type === 'ready') { + this.handleWorkerReady(worker); + } else if (message.type === 'initialization_failed') { + this.handleWorkerInitializationFailed(worker, message.error); + } else if (message.type === 'heartbeat') { this.handleHeartbeat(worker); } else if (message.type === 'log') { // Handle log messages from workers @@ -95,6 +99,40 @@ class WorkerPool { worker.once('exit', () => this.handleWorkerFailure(worker, new Error('Worker exited'))); } + private handleWorkerReady(worker: Worker): void { + logger.info('worker', `${this.workerType} worker ready for jobs`, { + workerType: this.workerType, + threadId: worker.threadId, + totalWorkers: this.workers.length, + poolSize: this.poolSize, + }); + + // Add to available pool now that initialization is complete + this.availableWorkers.add(worker); + + // Reset death timer to normal cadence + const existingTimer = this.workerDeathTimers.get(worker); + if (existingTimer) clearTimeout(existingTimer); + + const newTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); + this.workerDeathTimers.set(worker, newTimer); + + // Try to process any queued jobs + this.processNextJob(); + } + + private handleWorkerInitializationFailed(worker: Worker, error: string): void { + logger.error('worker', `${this.workerType} worker initialization failed`, { + workerType: this.workerType, + threadId: worker.threadId, + error, + }); + + // Remove the failed worker and create a replacement + this.removeWorker(worker); + this.createWorker(); + } + private handleHeartbeat(worker: Worker): void { logger.debug('worker', `Heartbeat received from ${this.workerType} worker`, { workerType: this.workerType, @@ -109,8 +147,9 @@ class WorkerPool { const newTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); this.workerDeathTimers.set(worker, newTimer); - // Mark available if not busy - if (!this.busyWorkers.has(worker)) { + // Only mark as available if not busy (worker should already be in pool from 'ready' signal) + // This handles the case where a worker becomes available again after completing a job + if (!this.busyWorkers.has(worker) && !this.availableWorkers.has(worker)) { this.availableWorkers.add(worker); this.processNextJob(); } From 81d1c0882a6b9a9fa25ba53c548cd8c787577e72 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sat, 1 Nov 2025 17:26:52 +0100 Subject: [PATCH 35/48] refactor: update suitability score thresholds and improve error logging in worker initialization --- src/competence-matcher/src/utils/prompts.ts | 6 +++--- src/competence-matcher/src/worker/worker-manager.ts | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/competence-matcher/src/utils/prompts.ts b/src/competence-matcher/src/utils/prompts.ts index 8ccb1e280..042997195 100644 --- a/src/competence-matcher/src/utils/prompts.ts +++ b/src/competence-matcher/src/utils/prompts.ts @@ -629,9 +629,9 @@ const systemIntermezzo: Message = { content: ` Remember to base your reason on both the similarity score and the alignment label without mentioning them explicitly. Scores above 0.6 generally indicate a good up to perfect suitability. - Scores between 0.4 and 0.6 generally indicate some up to a good degree of suitability. - Scores between 0.2 and 0.4 generally indicate some degree of suitability. - Scores below or equal to 0.2 generally indicate little to no suitability. + Scores between 0.45 and 0.6 generally indicate some up to a good degree of suitability. + Scores between 0.25 and 0.45 generally indicate some degree of suitability. + Scores below or equal to 0.25 generally indicate little to no suitability. `, }; diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 8f98ed1f7..b96257f5e 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -122,10 +122,9 @@ class WorkerPool { } private handleWorkerInitializationFailed(worker: Worker, error: string): void { - logger.error('worker', `${this.workerType} worker initialization failed`, { + logger.error('worker', `${this.workerType} worker initialization failed`, new Error(error), { workerType: this.workerType, threadId: worker.threadId, - error, }); // Remove the failed worker and create a replacement From 7c1d6f98d4114bebc12daef5b115f65f7ff40e13 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sat, 1 Nov 2025 17:31:05 +0100 Subject: [PATCH 36/48] fix: resolve V8 API locking issues in worker threads by setting device to wasm --- src/competence-matcher/src/utils/model.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/competence-matcher/src/utils/model.ts b/src/competence-matcher/src/utils/model.ts index b0320beb2..24699eaed 100644 --- a/src/competence-matcher/src/utils/model.ts +++ b/src/competence-matcher/src/utils/model.ts @@ -48,6 +48,12 @@ export abstract class TransformerPipeline { progress_callback: options?.progress_callback, }; + // onnxruntime-node (the default) has V8 API locking issues in worker threads + if (!isMainThread) { + // @ts-ignore - Setting backend preference for worker threads + opts.device = 'wasm'; + } + // actually load the pipeline this.instance = await pipeline(task as PipelineType, model, opts); From 946e350f0e6178e58b63cdf7036574358002953f Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sat, 1 Nov 2025 17:36:58 +0100 Subject: [PATCH 37/48] refactor: update Node.js engine compatibility and remove V8 API locking workaround in TransformerPipeline --- src/competence-matcher/package.json | 2 +- src/competence-matcher/src/utils/model.ts | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index 7aa530aa7..af71a8f03 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -39,6 +39,6 @@ "typescript": "^5.8.3" }, "engines": { - "node": ">=23.5.0" + "node": ">=20.0.0 <23.0.0" } } diff --git a/src/competence-matcher/src/utils/model.ts b/src/competence-matcher/src/utils/model.ts index 24699eaed..b0320beb2 100644 --- a/src/competence-matcher/src/utils/model.ts +++ b/src/competence-matcher/src/utils/model.ts @@ -48,12 +48,6 @@ export abstract class TransformerPipeline { progress_callback: options?.progress_callback, }; - // onnxruntime-node (the default) has V8 API locking issues in worker threads - if (!isMainThread) { - // @ts-ignore - Setting backend preference for worker threads - opts.device = 'wasm'; - } - // actually load the pipeline this.instance = await pipeline(task as PipelineType, model, opts); From a0494cbd1118e378752bfd38a93010fd7b21c40a Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 5 Nov 2025 14:44:39 +0100 Subject: [PATCH 38/48] revert --- src/competence-matcher/package.json | 2 +- src/competence-matcher/src/config.ts | 2 +- src/competence-matcher/src/tasks/embedding.ts | 11 ---- src/competence-matcher/src/worker/embedder.ts | 62 +++++++------------ src/competence-matcher/src/worker/matcher.ts | 59 +++++++----------- .../src/worker/worker-manager.ts | 46 ++------------ 6 files changed, 49 insertions(+), 133 deletions(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index af71a8f03..7aa530aa7 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -39,6 +39,6 @@ "typescript": "^5.8.3" }, "engines": { - "node": ">=20.0.0 <23.0.0" + "node": ">=23.5.0" } } diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index b86cb38c4..5a2ae706a 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -22,7 +22,7 @@ export const config = { embeddingWorkers: parseInt(process.env.EMBEDDING_WORKERS || '1', 10), // Number of embedding workers to keep alive matchingWorkers: parseInt(process.env.MATCHING_WORKERS || '1', 10), // Number of matching workers to keep alive workerHeartbeatInterval: parseInt(process.env.WORKER_HEARTBEAT_INTERVAL || '30', 10) * 1_000, // Worker heartbeat interval in seconds (converted to ms) - how often workers send heartbeats - workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '120', 10) * 1_000, // Worker death timeout in seconds (converted to ms) - how long to wait before considering worker dead (increased from 45s to 120s to accommodate model loading and job execution) + workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '45', 10) * 1_000, // Worker death timeout in seconds (converted to ms) - how long to wait before considering worker dead maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds logLevel: process.env.LOG_LEVEL || 'INFO', // Levels: 'DEBUG', 'INFO', 'WARN', 'ERROR' logTypes: process.env.LOG_TYPES || 'server,request,worker,database,model,system', diff --git a/src/competence-matcher/src/tasks/embedding.ts b/src/competence-matcher/src/tasks/embedding.ts index 9ab5d42ff..4148e6980 100644 --- a/src/competence-matcher/src/tasks/embedding.ts +++ b/src/competence-matcher/src/tasks/embedding.ts @@ -41,17 +41,6 @@ export default class Embedding extends TransformerPipeline { // Handle job messages @@ -76,7 +38,27 @@ parentPort.on('message', async (message: any) => { taskCount: job.tasks?.length || 0, }); - // Models are already initialized on worker startup, so we skip ensureModelsInitialised here + // ensure models are initialised (but do not run this for health_check) + try { + await ensureModelsInitialised(); + } catch (err) { + workerLogger( + job.jobId || 'system', + 'debug', + 'Embedder worker failed to initialize models', + { threadId, jobId: job.jobId }, + err instanceof Error ? err : new Error(String(err)), + ); + // Notify parent and exit or mark job failed + parentPort!.postMessage({ + type: 'error', + jobId: job.jobId, + error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, + }); + // still send job_completed so worker pool can continue + parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); + return; + } workerLogger(job.jobId, 'debug', `Starting embedding job with ${job.tasks.length} tasks`, { threadId, diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 0759eccda..e7d62ddfc 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -21,7 +21,7 @@ async function ensureModelsInitialised() { await Embedding.getInstance(); await ZeroShot.getInstance(); modelsInitialised = true; - workerLogger('system', 'info', 'Matcher worker models loaded', { threadId }); + workerLogger('system', 'debug', 'Matcher worker online', { threadId }); } catch (err) { // Bubble up so job handling can report the error throw err; @@ -31,42 +31,6 @@ async function ensureModelsInitialised() { // Start heartbeat immediately startHeartbeat('matcher', config.workerHeartbeatInterval); -// Pre-load models on worker startup to avoid timeout on first job -(async function initializeWorker() { - try { - workerLogger('system', 'info', 'Matcher worker initializing - loading models...', { threadId }); - await ensureModelsInitialised(); - - // Signal to main thread that worker is ready - parentPort!.postMessage({ - type: 'ready', - threadId, - workerType: 'matcher', - }); - - workerLogger('system', 'info', 'Matcher worker fully initialized and ready for jobs', { - threadId, - }); - } catch (error) { - workerLogger( - 'system', - 'error', - 'Matcher worker failed to initialize', - { threadId }, - error instanceof Error ? error : new Error(String(error)), - ); - - parentPort!.postMessage({ - type: 'initialization_failed', - threadId, - workerType: 'matcher', - error: error instanceof Error ? error.message : String(error), - }); - - process.exit(1); - } -})(); - // Set up job message handler parentPort.on('message', async (message: any) => { // Handle job messages @@ -79,7 +43,26 @@ parentPort.on('message', async (message: any) => { listId: job.listId, }); - // Models are already initialized on worker startup, so we skip ensureModelsInitialised here + try { + await ensureModelsInitialised(); + } catch (err) { + workerLogger( + job.jobId || 'system', + 'debug', + 'Matcher worker failed to initialize models', + { threadId, jobId: job.jobId }, + err instanceof Error ? err : new Error(String(err)), + ); + // Notify parent and exit or mark job failed + parentPort!.postMessage({ + type: 'error', + jobId: job.jobId, + error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, + }); + // still send job_completed so worker pool can continue + parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); + return; + } workerLogger(job.jobId, 'debug', `Starting matching job with ${job.tasks.length} tasks`, { threadId, diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index b96257f5e..b036e1696 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -78,16 +78,12 @@ class WorkerPool { busyWorkers: this.busyWorkers.size, }); - // Set initial death timer (longer timeout for initialization) + // Set initial death timer const deathTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); this.workerDeathTimers.set(worker, deathTimer); worker.on('message', (message: any) => { - if (message.type === 'ready') { - this.handleWorkerReady(worker); - } else if (message.type === 'initialization_failed') { - this.handleWorkerInitializationFailed(worker, message.error); - } else if (message.type === 'heartbeat') { + if (message.type === 'heartbeat') { this.handleHeartbeat(worker); } else if (message.type === 'log') { // Handle log messages from workers @@ -99,39 +95,6 @@ class WorkerPool { worker.once('exit', () => this.handleWorkerFailure(worker, new Error('Worker exited'))); } - private handleWorkerReady(worker: Worker): void { - logger.info('worker', `${this.workerType} worker ready for jobs`, { - workerType: this.workerType, - threadId: worker.threadId, - totalWorkers: this.workers.length, - poolSize: this.poolSize, - }); - - // Add to available pool now that initialization is complete - this.availableWorkers.add(worker); - - // Reset death timer to normal cadence - const existingTimer = this.workerDeathTimers.get(worker); - if (existingTimer) clearTimeout(existingTimer); - - const newTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); - this.workerDeathTimers.set(worker, newTimer); - - // Try to process any queued jobs - this.processNextJob(); - } - - private handleWorkerInitializationFailed(worker: Worker, error: string): void { - logger.error('worker', `${this.workerType} worker initialization failed`, new Error(error), { - workerType: this.workerType, - threadId: worker.threadId, - }); - - // Remove the failed worker and create a replacement - this.removeWorker(worker); - this.createWorker(); - } - private handleHeartbeat(worker: Worker): void { logger.debug('worker', `Heartbeat received from ${this.workerType} worker`, { workerType: this.workerType, @@ -146,9 +109,8 @@ class WorkerPool { const newTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); this.workerDeathTimers.set(worker, newTimer); - // Only mark as available if not busy (worker should already be in pool from 'ready' signal) - // This handles the case where a worker becomes available again after completing a job - if (!this.busyWorkers.has(worker) && !this.availableWorkers.has(worker)) { + // Mark available if not busy + if (!this.busyWorkers.has(worker)) { this.availableWorkers.add(worker); this.processNextJob(); } From dd87a1d6661ef311e9e9f922febb095acc5e292e Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 5 Nov 2025 14:49:30 +0100 Subject: [PATCH 39/48] update @huggingface/transformers dependency to version 3.7.6 --- src/competence-matcher/package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index 7aa530aa7..8030bb2c0 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -25,7 +25,7 @@ }, "homepage": "https://github.com/PROCEED-Labs/proceed#readme", "dependencies": { - "@huggingface/transformers": "^3.5.2", + "@huggingface/transformers": "^3.7.6", "dotenv": "^17.2.1", "express": "^5.1.0", "ollama": "^0.5.16", @@ -41,4 +41,4 @@ "engines": { "node": ">=23.5.0" } -} +} \ No newline at end of file From 5fcadb05e153d3a943d29fe4566fa3885863a417 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 5 Nov 2025 15:47:50 +0100 Subject: [PATCH 40/48] refactor: update worker configuration parameters and enhance job recovery logic --- src/competence-matcher/src/config.ts | 5 ++- src/competence-matcher/src/utils/worker.ts | 18 +++++++-- src/competence-matcher/src/worker/embedder.ts | 7 +++- src/competence-matcher/src/worker/matcher.ts | 12 +++++- .../src/worker/worker-manager.ts | 38 +++++++++++++++++-- 5 files changed, 69 insertions(+), 11 deletions(-) diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index 5a2ae706a..a5065a02f 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -21,8 +21,8 @@ export const config = { splittingSymbol: process.env.SPLITTING_SYMBOL || '', embeddingWorkers: parseInt(process.env.EMBEDDING_WORKERS || '1', 10), // Number of embedding workers to keep alive matchingWorkers: parseInt(process.env.MATCHING_WORKERS || '1', 10), // Number of matching workers to keep alive - workerHeartbeatInterval: parseInt(process.env.WORKER_HEARTBEAT_INTERVAL || '30', 10) * 1_000, // Worker heartbeat interval in seconds (converted to ms) - how often workers send heartbeats - workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '45', 10) * 1_000, // Worker death timeout in seconds (converted to ms) - how long to wait before considering worker dead + workerHeartbeatInterval: parseInt(process.env.WORKER_HEARTBEAT_INTERVAL || '60', 10) * 1_000, // Worker heartbeat interval in seconds (converted to ms) - how often workers send heartbeats + workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '240', 10) * 1_000, // Worker death timeout in seconds (converted to ms) - how long to wait before considering worker dead maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds logLevel: process.env.LOG_LEVEL || 'INFO', // Levels: 'DEBUG', 'INFO', 'WARN', 'ERROR' logTypes: process.env.LOG_TYPES || 'server,request,worker,database,model,system', @@ -39,6 +39,7 @@ export const config = { 1_000, // Maximum time to wait for all worker pools to become ready at startup (seconds to ms) maxWorkerRetries: parseInt(process.env.MAX_WORKER_RETRIES || '3', 10), // Maximum worker restart attempts before escalating to ERROR workerRetryWindow: parseInt(process.env.WORKER_RETRY_WINDOW || '300', 10) * 1_000, // Time window in seconds to reset retry count (converted to ms) + jobMaxRetries: parseInt(process.env.JOB_MAX_RETRIES || '3', 10), // Maximum number of retries for a job before failing it maxOllamaRetries: parseInt(process.env.MAX_OLLAMA_RETRIES || '5', 10), // Maximum model pull retry attempts ollamaRetryDelay: parseInt(process.env.OLLAMA_RETRY_DELAY || '30', 10) * 1_000, // Base delay between retries in seconds (converted to ms) ollamaRetryBackoff: parseFloat(process.env.OLLAMA_RETRY_BACKOFF || '1.5'), // Exponential backoff multiplier diff --git a/src/competence-matcher/src/utils/worker.ts b/src/competence-matcher/src/utils/worker.ts index 51791a23d..9715884c3 100644 --- a/src/competence-matcher/src/utils/worker.ts +++ b/src/competence-matcher/src/utils/worker.ts @@ -1,6 +1,6 @@ import * as fs from 'node:fs'; import * as path from 'node:path'; -import { Worker, parentPort } from 'worker_threads'; +import { Worker, parentPort, threadId } from 'worker_threads'; import VectorDataBase from '../db/db'; import { getDB } from './db'; import { config } from '../config'; @@ -122,8 +122,6 @@ export function startHeartbeat(workerType: string, intervalMs: number = 20000): throw new Error('startHeartbeat can only be called from worker threads'); } - const { threadId } = require('worker_threads'); - const sendHeartbeat = () => { workerLogger('system', 'debug', `${workerType} worker sending heartbeat`, { workerType, @@ -149,3 +147,17 @@ export function startHeartbeat(workerType: string, intervalMs: number = 20000): clearInterval(heartbeatInterval); }; } + +export function sendHeartbeat(workerType: string): void { + if (!parentPort) { + throw new Error('sendHeartbeat can only be called from worker threads'); + } + + parentPort.postMessage({ + type: 'heartbeat', + workerType, + threadId, + timestamp: Date.now(), + source: 'manual', + }); +} diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index 310dc096b..082764e55 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -1,7 +1,7 @@ import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; import { splitSemantically } from '../tasks/semantic-split'; -import { withJobUpdates, workerLogger, startHeartbeat } from '../utils/worker'; +import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../utils/worker'; import { EmbeddingJob } from '../utils/types'; import { config } from '../config'; @@ -75,10 +75,13 @@ parentPort.on('message', async (message: any) => { // Process each embedding task for (const { listId, resourceId, competenceId, text, type } of work) { + sendHeartbeat('embedder'); try { // Generate embedding for the text const [vector] = await Embedding.embed(text); + sendHeartbeat('embedder'); + workerLogger(jobId, 'debug', `Generated embedding for ${type} text`, { threadId, competenceId, @@ -94,6 +97,8 @@ parentPort.on('message', async (message: any) => { type, embedding: vector, }); + + sendHeartbeat('embedder'); } catch (error) { // Log the error but continue with other tasks workerLogger( diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index e7d62ddfc..8725eb100 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -1,6 +1,6 @@ import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; -import { withJobUpdates, workerLogger, startHeartbeat } from '../utils/worker'; +import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../utils/worker'; import { addReason } from '../tasks/reason'; import { Match, MatchingJob } from '../utils/types'; import ZeroShot, { labels } from '../tasks/semantic-zeroshot'; @@ -90,11 +90,15 @@ parentPort.on('message', async (message: any) => { continue; // Skip tasks without description } + sendHeartbeat('matcher'); + try { // Generate embedding for the task description // Todo: Handle embedding via the dedicated embedding worker const [vector] = await Embedding.embed(description); + sendHeartbeat('matcher'); + // Search for matches in the competence database let matches: Match[] = db.searchEmbedding(vector, { filter: { @@ -103,6 +107,8 @@ parentPort.on('message', async (message: any) => { }, }); + sendHeartbeat('matcher'); + // Process each match for (const match of matches) { let flag = 'neutral'; // Default flag @@ -117,9 +123,13 @@ parentPort.on('message', async (message: any) => { // Get Alignment via Zero-Shot const sentiment = await ZeroShot.nliBiDirectional(description, match.text); + sendHeartbeat('matcher'); + const contradiction = await ZeroShot.contradictionCheck(description, match.text); const alignment = await ZeroShot.alignmentCheck(description, match.text); + sendHeartbeat('matcher'); + // console.log('task: ', description); // console.log('capability: ', match.text); // console.log('alignment: ', alignment); diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index b036e1696..0af56200e 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -7,7 +7,13 @@ import { getLogger } from '../utils/logger'; import { addReason } from '../tasks/reason'; import { getDB } from '../utils/db'; -const { embeddingWorkers, matchingWorkers, workerHeartbeatInterval, workerDeathTimeout } = config; +const { + embeddingWorkers, + matchingWorkers, + workerHeartbeatInterval, + workerDeathTimeout, + jobMaxRetries, +} = config; const logger = getLogger(); /** @@ -23,10 +29,12 @@ class WorkerPool { private workersBeingReplaced: Set = new Set(); // Prevent double replacement using threadId private readonly workerType: workerTypes; private readonly poolSize: number; + private readonly maxJobRetries: number; constructor(workerType: workerTypes, poolSize: number) { this.workerType = workerType; this.poolSize = poolSize; + this.maxJobRetries = Math.max(0, jobMaxRetries); logger.info('worker', `Initializing ${workerType} pool with ${poolSize} workers`, { workerType, @@ -160,6 +168,21 @@ class WorkerPool { busyWorkersBefore: this.busyWorkers.size, }); + const activeJob = this.activeJobs.get(worker); + if (activeJob) { + this.activeJobs.delete(worker); + this.busyWorkers.delete(worker); + + logger.warn('worker', `Recovering job from unresponsive ${this.workerType} worker`, { + workerType: this.workerType, + threadId: worker.threadId, + jobId: activeJob.job.jobId, + retryCount: activeJob.retryCount, + }); + + this.handleJobFailure(activeJob, new Error('Worker heartbeat timeout')); + } + this.removeWorker(worker); logger.info('worker', `Creating replacement ${this.workerType} worker after timeout`, { @@ -180,6 +203,8 @@ class WorkerPool { }); this.workersBeingReplaced.delete(worker.threadId); + + this.processNextJob(); } private handleWorkerFailure(worker: Worker, error: Error): void { @@ -215,6 +240,7 @@ class WorkerPool { if (activeJob) { // Recover the job that was being processed this.activeJobs.delete(worker); + this.busyWorkers.delete(worker); logger.warn('worker', `Recovering job from failed ${this.workerType} worker`, { workerType: this.workerType, threadId: worker.threadId, @@ -248,6 +274,10 @@ class WorkerPool { ); this.workersBeingReplaced.delete(worker.threadId); + + if (activeJob) { + this.processNextJob(); + } } private removeWorker(worker: Worker): void { @@ -383,13 +413,13 @@ class WorkerPool { private handleJobFailure(queueItem: JobQueueItem, error: Error): void { const { job, resolve, reject, retryCount } = queueItem; - if (retryCount < 2) { + if (retryCount < this.maxJobRetries) { // Requeue job with incremented retry count logger.info('worker', `Retrying ${this.workerType} job`, { workerType: this.workerType, jobId: job.jobId, retryCount: retryCount + 1, - maxRetries: 2, + maxRetries: this.maxJobRetries, error: error.message, }); @@ -400,7 +430,7 @@ class WorkerPool { retryCount: retryCount + 1, }); } else { - // Final failure after 2 retries + // Final failure after reaching retry limit logger.error( 'worker', `${this.workerType} job failed permanently after max retries`, From 2d268cf6017d76c70e41f54a3c28ea52d4ceb256 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 5 Nov 2025 16:39:28 +0100 Subject: [PATCH 41/48] implement task embedding management and update job processing workflow --- src/competence-matcher/src/db/db.ts | 55 +++++ .../src/middleware/match.ts | 31 ++- .../src/tasks/matching-pipeline.ts | 49 +++++ src/competence-matcher/src/utils/types.ts | 11 +- src/competence-matcher/src/worker/embedder.ts | 207 ++++++++++++------ src/competence-matcher/src/worker/matcher.ts | 55 +++-- 6 files changed, 307 insertions(+), 101 deletions(-) create mode 100644 src/competence-matcher/src/tasks/matching-pipeline.ts diff --git a/src/competence-matcher/src/db/db.ts b/src/competence-matcher/src/db/db.ts index c92974a24..a82b097b7 100644 --- a/src/competence-matcher/src/db/db.ts +++ b/src/competence-matcher/src/db/db.ts @@ -81,6 +81,16 @@ class VectorDataBase { ON match_results(job_id); `); + // task embeddings per job (transient, cleared after matching) + this.db.exec(` + CREATE TABLE IF NOT EXISTS task_embedding ( + job_id TEXT NOT NULL REFERENCES jobs(id) ON DELETE CASCADE, + task_id TEXT NOT NULL, + embedding FLOAT32[${this.embeddingDim}], + PRIMARY KEY (job_id, task_id) + ); + `); + // resource_list this.db.exec(` CREATE TABLE IF NOT EXISTS resource_list ( @@ -863,6 +873,51 @@ class VectorDataBase { })); return result; } + + /** + * Store or update the embedding vector for a specific task within a job. + */ + public upsertTaskEmbedding(jobId: string, taskId: string, embedding: number[]): void { + if (embedding.length !== this.embeddingDim) { + throw new Error(`Task embedding must have length ${this.embeddingDim}`); + } + + this.db + .prepare( + ` + INSERT INTO task_embedding (job_id, task_id, embedding) + VALUES (?, ?, vec_f32(?)) + ON CONFLICT(job_id, task_id) + DO UPDATE SET embedding = excluded.embedding + `, + ) + .run(jobId, taskId, new Float32Array(embedding)); + } + + /** + * Retrieve the embedding for a given task within a job. + */ + public getTaskEmbedding(jobId: string, taskId: string): number[] | null { + const row = this.db + .prepare( + ` + SELECT embedding + FROM task_embedding + WHERE job_id = ? AND task_id = ? + `, + ) + .get(jobId, taskId) as { embedding: Float32Array } | undefined; + + if (!row) return null; + return Array.from(row.embedding); + } + + /** + * Remove all task embeddings associated with a job. + */ + public deleteTaskEmbeddings(jobId: string): void { + this.db.prepare(`DELETE FROM task_embedding WHERE job_id = ?`).run(jobId); + } } export default VectorDataBase; diff --git a/src/competence-matcher/src/middleware/match.ts b/src/competence-matcher/src/middleware/match.ts index 15dfb1a74..e85d36cb7 100644 --- a/src/competence-matcher/src/middleware/match.ts +++ b/src/competence-matcher/src/middleware/match.ts @@ -1,7 +1,6 @@ import { Request, Response, NextFunction } from 'express'; import { PATHS } from '../server'; import { getDB } from '../utils/db'; -import workerManager from '../worker/worker-manager'; import { CompetenceInput, GroupedMatchResults, @@ -19,6 +18,7 @@ import { CompetenceMatcherError, } from '../utils/errors'; import { getLogger } from '../utils/logger'; +import { scheduleMatchingPipeline } from '../tasks/matching-pipeline'; const logger = getLogger(); @@ -130,6 +130,16 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct ); } + try { + db.updateJobStatus(jobId, 'preprocessing'); + } catch (error) { + throw new DatabaseError( + 'updateJobStatus', + error instanceof Error ? error : new Error(String(error)), + requestId, + ); + } + const job: MatchingJob = { jobId, dbName: req.dbName!, @@ -160,7 +170,7 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct }; try { - workerManager.enqueue(job, 'matcher'); + scheduleMatchingPipeline(job, job.tasks, requestId); } catch (error) { throw new CompetenceMatcherError( `Failed to enqueue matching job: ${error instanceof Error ? error.message : String(error)}`, @@ -176,7 +186,7 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct .setHeader('Location', `${PATHS.match}/jobs/${jobId}`) // Accepted response .status(202) - .json({ jobId, status: 'pending' }); + .json({ jobId, status: 'preprocessing' }); return; } @@ -218,19 +228,6 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct // Chain the matching job to start after embedding completes embeddingResult.promise .then(() => { - // Embedding is done, now update matching job status and start it - try { - db.updateJobStatus(matchingJobId, 'pending'); - } catch (error) { - logger.error( - 'system', - 'Failed to update matching job status to pending', - error instanceof Error ? error : new Error(String(error)), - {}, - requestId, - ); - } - const matchingJob: MatchingJob = { jobId: matchingJobId, dbName: req.dbName!, @@ -260,7 +257,7 @@ export function matchCompetenceList(req: Request, res: Response, next: NextFunct }), }; - workerManager.enqueue(matchingJob, 'matcher'); + scheduleMatchingPipeline(matchingJob, matchingJob.tasks, requestId); }) .catch((error) => { // Embedding failed, mark matching job as failed too diff --git a/src/competence-matcher/src/tasks/matching-pipeline.ts b/src/competence-matcher/src/tasks/matching-pipeline.ts new file mode 100644 index 000000000..c653a3515 --- /dev/null +++ b/src/competence-matcher/src/tasks/matching-pipeline.ts @@ -0,0 +1,49 @@ +import workerManager from '../worker/worker-manager'; +import { MatchingJob, MatchingTask, TaskEmbeddingJob } from '../utils/types'; +import { getLogger } from '../utils/logger'; +import { getDB } from '../utils/db'; + +/** + * Coordinates the two-step matching workflow by first embedding tasks and then + * handing the job over to the matcher worker pool. + */ +export function scheduleMatchingPipeline( + matchingJob: MatchingJob, + tasks: MatchingTask[], + requestId?: string, +): void { + const logger = getLogger(); + + const taskEmbeddingJob: TaskEmbeddingJob = { + jobId: matchingJob.jobId, + dbName: matchingJob.dbName, + mode: 'task', + tasks, + }; + + workerManager + .enqueue(taskEmbeddingJob, 'embedder') + .then(() => workerManager.enqueue(matchingJob, 'matcher')) + .catch((error) => { + logger.error( + 'system', + 'Task embedding stage failed before matcher enqueue', + error instanceof Error ? error : new Error(String(error)), + { jobId: matchingJob.jobId }, + requestId, + ); + + try { + const db = getDB(matchingJob.dbName); + db.updateJobStatus(matchingJob.jobId, 'failed'); + } catch (dbError) { + logger.error( + 'system', + 'Failed to mark job as failed after embedding stage error', + dbError instanceof Error ? dbError : new Error(String(dbError)), + { jobId: matchingJob.jobId }, + requestId, + ); + } + }); +} diff --git a/src/competence-matcher/src/utils/types.ts b/src/competence-matcher/src/utils/types.ts index 6f4ef92c0..825557b87 100644 --- a/src/competence-matcher/src/utils/types.ts +++ b/src/competence-matcher/src/utils/types.ts @@ -119,14 +119,23 @@ export interface Job { dbName: string; } -export interface EmbeddingJob extends Job { +export interface ResourceEmbeddingJob extends Job { + mode?: 'resource'; tasks: EmbeddingTask[]; } +export interface TaskEmbeddingJob extends Job { + mode: 'task'; + tasks: MatchingTask[]; +} + +export type EmbeddingJob = ResourceEmbeddingJob | TaskEmbeddingJob; + export interface MatchingJob extends Job { listId?: string; // Which List to match against resourceId?: string; // Optional: If matching against a single resource tasks: MatchingTask[]; // Tasks to match + taskEmbeddings?: Record; // Optional precomputed task embeddings keyed by taskId } export type ResourceRanking = { diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index 082764e55..789a44383 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -1,8 +1,8 @@ import { parentPort, threadId } from 'worker_threads'; import Embedding from '../tasks/embedding'; -import { splitSemantically } from '../tasks/semantic-split'; import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../utils/worker'; -import { EmbeddingJob } from '../utils/types'; +import { EmbeddingJob, ResourceEmbeddingJob, TaskEmbeddingJob } from '../utils/types'; +import { getDB } from '../utils/db'; import { config } from '../config'; /** @@ -36,6 +36,7 @@ parentPort.on('message', async (message: any) => { threadId, jobId: job.jobId, taskCount: job.tasks?.length || 0, + mode: job.mode ?? 'resource', }); // ensure models are initialised (but do not run this for health_check) @@ -60,96 +61,178 @@ parentPort.on('message', async (message: any) => { return; } - workerLogger(job.jobId, 'debug', `Starting embedding job with ${job.tasks.length} tasks`, { + try { + if (job.mode === 'task') { + await processTaskEmbeddingJob(job as TaskEmbeddingJob); + } else { + await processResourceEmbeddingJob(job as ResourceEmbeddingJob); + } + + workerLogger(job.jobId, 'debug', `Embedding job completed`, { + threadId, + taskCount: job.tasks.length, + mode: job.mode ?? 'resource', + }); + } catch (error) { + workerLogger( + job.jobId, + 'debug', + `Embedding job failed`, + { + threadId, + mode: job.mode ?? 'resource', + }, + error instanceof Error ? error : new Error(String(error)), + ); + } finally { + parentPort!.postMessage({ + type: 'job_completed', + jobId: job.jobId, + }); + } +}); + +workerLogger('system', 'debug', `Embedder worker thread ready`, { + threadId, +}); + +async function processResourceEmbeddingJob(job: ResourceEmbeddingJob): Promise { + workerLogger( + job.jobId, + 'debug', + `Starting resource embedding job with ${job.tasks.length} tasks`, + { + threadId, + taskCount: job.tasks.length, + }, + ); + + await withJobUpdates(job, async (db, { tasks, jobId }) => { + let work = tasks; + + // TODO: Re-enable semantic splitting once the worker crash issue is resolved + // work = await splitSemantically(tasks); + + for (const { listId, resourceId, competenceId, text, type } of work) { + sendHeartbeat('embedder'); + try { + const [vector] = await Embedding.embed(text); + + sendHeartbeat('embedder'); + + workerLogger(jobId, 'debug', `Generated embedding for ${type} text`, { + threadId, + competenceId, + textLength: text.length, + }); + + db.upsertEmbedding({ + listId, + resourceId, + competenceId, + text, + type, + embedding: vector, + }); + + sendHeartbeat('embedder'); + } catch (error) { + workerLogger( + jobId, + 'error', + `Failed to process embedding task`, + { + threadId, + competenceId, + type, + }, + error instanceof Error ? error : new Error(String(error)), + ); + + parentPort!.postMessage({ + type: 'error', + jobId, + error: `Failed to process embedding task: ${error instanceof Error ? error.message : String(error)}`, + }); + } + } + }); +} + +async function processTaskEmbeddingJob(job: TaskEmbeddingJob): Promise { + workerLogger(job.jobId, 'debug', `Starting task embedding job with ${job.tasks.length} tasks`, { threadId, taskCount: job.tasks.length, }); - try { - await withJobUpdates(job, async (db, { tasks, jobId }) => { - let work = tasks; + const dbInstance = getDB(job.dbName); - // TODO: Re-enable semantic splitting once the worker crash issue is resolved - // Split tasks semantically - // work = await splitSemantically(tasks); + await withJobUpdates( + job, + async (db, { tasks, jobId }) => { + for (const task of tasks) { + const { taskId, description } = task; + if (!description) { + workerLogger(jobId, 'warn', 'Skipping task without description for embedding', { + threadId, + taskId, + }); + continue; + } - // Process each embedding task - for (const { listId, resourceId, competenceId, text, type } of work) { sendHeartbeat('embedder'); + try { - // Generate embedding for the text - const [vector] = await Embedding.embed(text); + const [vector] = await Embedding.embed(description); sendHeartbeat('embedder'); - workerLogger(jobId, 'debug', `Generated embedding for ${type} text`, { + workerLogger(jobId, 'debug', 'Generated task embedding', { threadId, - competenceId, - textLength: text.length, + taskId, + textLength: description.length, }); - // Store embedding in database - db.upsertEmbedding({ - listId, - resourceId, - competenceId, - text, - type, - embedding: vector, - }); + db.upsertTaskEmbedding(jobId, taskId, vector); sendHeartbeat('embedder'); } catch (error) { - // Log the error but continue with other tasks workerLogger( jobId, 'error', - `Failed to process embedding task`, + 'Failed to generate task embedding', { threadId, - competenceId, - type, + taskId, }, error instanceof Error ? error : new Error(String(error)), ); - // Individual task errors don't fail the entire job - // Send error notification but continue processing parentPort!.postMessage({ type: 'error', jobId, - error: `Failed to process embedding task: ${error instanceof Error ? error.message : String(error)}`, + error: `Failed to process task ${taskId}: ${error instanceof Error ? error.message : String(error)}`, }); } } - }); - - // Job completed successfully - workerLogger(job.jobId, 'debug', `Embedding job completed`, { - threadId, - taskCount: job.tasks.length, - }); - } catch (error) { - // Job-level error - already handled by withJobUpdates - // Just log it for worker context - workerLogger( - job.jobId, - 'debug', - `Embedding job failed`, - { - threadId, + }, + { + onStart: () => { + dbInstance.updateJobStatus(job.jobId, 'preprocessing'); + parentPort!.postMessage({ + type: 'status', + jobId: job.jobId, + status: 'preprocessing', + }); }, - error instanceof Error ? error : new Error(String(error)), - ); - } finally { - // Always notify job completion so worker can process next job - parentPort!.postMessage({ - type: 'job_completed', - jobId: job.jobId, - }); - } -}); - -workerLogger('system', 'debug', `Embedder worker thread ready`, { - threadId, -}); + onDone: () => { + dbInstance.updateJobStatus(job.jobId, 'pending'); + parentPort!.postMessage({ + type: 'status', + jobId: job.jobId, + status: 'pending', + }); + }, + }, + ); +} diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 8725eb100..9fd96bf85 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -1,11 +1,12 @@ import { parentPort, threadId } from 'worker_threads'; -import Embedding from '../tasks/embedding'; import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../utils/worker'; import { addReason } from '../tasks/reason'; import { Match, MatchingJob } from '../utils/types'; import ZeroShot, { labels } from '../tasks/semantic-zeroshot'; // import CrossEncoder from '../tasks/cross-encode'; import { config } from '../config'; +import { getDB } from '../utils/db'; +import Embedding from '../tasks/embedding'; /** * New matcher worker that stays alive and processes jobs sequentially @@ -18,7 +19,6 @@ let modelsInitialised = false; async function ensureModelsInitialised() { if (modelsInitialised) return; try { - await Embedding.getInstance(); await ZeroShot.getInstance(); modelsInitialised = true; workerLogger('system', 'debug', 'Matcher worker online', { threadId }); @@ -93,23 +93,32 @@ parentPort.on('message', async (message: any) => { sendHeartbeat('matcher'); try { - // Generate embedding for the task description - // Todo: Handle embedding via the dedicated embedding worker - const [vector] = await Embedding.embed(description); + // Retrieve stored embedding for the task + let vector = job.taskEmbeddings?.[taskId] ?? db.getTaskEmbedding(jobId, taskId); + + if (!vector) { + workerLogger(jobId, 'warn', 'Task embedding missing, computing on matcher worker', { + threadId, + taskId, + }); + + const [fallback] = await Embedding.embed(description); + vector = fallback; + } + + if (!vector) { + throw new Error(`Unable to obtain embedding for task ${taskId}`); + } sendHeartbeat('matcher'); // Search for matches in the competence database - let matches: Match[] = db.searchEmbedding(vector, { + const matches: Match[] = db.searchEmbedding(vector, { filter: { listId: listIdFilter, resourceId: resourceIdFilter, // Optional: If matching against a single resource }, }); - - sendHeartbeat('matcher'); - - // Process each match for (const match of matches) { let flag = 'neutral'; // Default flag @@ -120,7 +129,6 @@ parentPort.on('message', async (message: any) => { config.matchDistanceMultiplier, ); - // Get Alignment via Zero-Shot const sentiment = await ZeroShot.nliBiDirectional(description, match.text); sendHeartbeat('matcher'); @@ -130,12 +138,6 @@ parentPort.on('message', async (message: any) => { sendHeartbeat('matcher'); - // console.log('task: ', description); - // console.log('capability: ', match.text); - // console.log('alignment: ', alignment); - // console.log('________________________________________'); - - // First: Contradicting? if ( sentiment.ranking[0] == 'contradict' || sentiment.contradict > config.contradictionThreshold || @@ -143,22 +145,18 @@ parentPort.on('message', async (message: any) => { ) { flag = 'contradicting'; newDistance = 0.0; - // Second: Aligning? } else if ( sentiment.entail > config.entailmentThreshold && match.distance > config.alignmentDistanceThreshold && alignment.aligning ) { flag = 'aligning'; - // Boost similarity-based distance newDistance = Math.min(1, newDistance * config.alignmentBoostMultiplier); } else { flag = 'neutral'; - // Reduce distance for neutral newDistance *= config.neutralReductionMultiplier; } - // Store match result for reasoning workaround matchResults[description].push({ jobId, taskId, @@ -224,6 +222,21 @@ parentPort.on('message', async (message: any) => { error instanceof Error ? error : new Error(String(error)), ); } finally { + try { + const cleanupDb = getDB(job.dbName); + cleanupDb.deleteTaskEmbeddings(job.jobId); + } catch (error) { + workerLogger( + job.jobId, + 'warn', + 'Failed to clean up task embeddings after matching job', + { + threadId, + }, + error instanceof Error ? error : new Error(String(error)), + ); + } + // Always notify job completion so worker can process next job parentPort!.postMessage({ type: 'job_completed', From 8683b3af424e1aaaec571c12510f2eef536d7675 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 5 Nov 2025 17:19:01 +0100 Subject: [PATCH 42/48] minor fix --- src/competence-matcher/src/db/db.ts | 42 ++++++++++++++++++-- src/competence-matcher/src/worker/matcher.ts | 17 +++----- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/competence-matcher/src/db/db.ts b/src/competence-matcher/src/db/db.ts index a82b097b7..af2a97fd1 100644 --- a/src/competence-matcher/src/db/db.ts +++ b/src/competence-matcher/src/db/db.ts @@ -906,10 +906,46 @@ class VectorDataBase { WHERE job_id = ? AND task_id = ? `, ) - .get(jobId, taskId) as { embedding: Float32Array } | undefined; + .get(jobId, taskId) as { embedding: unknown } | undefined; - if (!row) return null; - return Array.from(row.embedding); + if (!row || row.embedding == null) return null; + + let vector: number[] | null = null; + const raw = row.embedding as any; + + if (raw instanceof Float32Array) { + vector = Array.from(raw); + } else if (Buffer.isBuffer(raw)) { + // sqlite-vec returns the FLOAT32 column as a Buffer; reinterpret it as Float32 values + const floatView = new Float32Array( + raw.buffer, + raw.byteOffset, + raw.byteLength / Float32Array.BYTES_PER_ELEMENT, + ); + vector = Array.from(floatView); + } else if (ArrayBuffer.isView(raw)) { + const view = raw as ArrayBufferView; + const floatView = new Float32Array( + view.buffer, + view.byteOffset, + view.byteLength / Float32Array.BYTES_PER_ELEMENT, + ); + vector = Array.from(floatView); + } else if (raw instanceof ArrayBuffer) { + vector = Array.from(new Float32Array(raw)); + } + + if (!vector) { + throw new Error('Unsupported embedding format fetched from task_embedding table'); + } + + if (vector.length !== this.embeddingDim) { + throw new Error( + `Task embedding length mismatch: expected ${this.embeddingDim}, received ${vector.length}`, + ); + } + + return vector; } /** diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 9fd96bf85..014ab2006 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -6,7 +6,6 @@ import ZeroShot, { labels } from '../tasks/semantic-zeroshot'; // import CrossEncoder from '../tasks/cross-encode'; import { config } from '../config'; import { getDB } from '../utils/db'; -import Embedding from '../tasks/embedding'; /** * New matcher worker that stays alive and processes jobs sequentially @@ -94,20 +93,16 @@ parentPort.on('message', async (message: any) => { try { // Retrieve stored embedding for the task - let vector = job.taskEmbeddings?.[taskId] ?? db.getTaskEmbedding(jobId, taskId); + const vector = job.taskEmbeddings?.[taskId] ?? db.getTaskEmbedding(jobId, taskId); if (!vector) { - workerLogger(jobId, 'warn', 'Task embedding missing, computing on matcher worker', { - threadId, - taskId, - }); - - const [fallback] = await Embedding.embed(description); - vector = fallback; + throw new Error(`No embedding stored for task ${taskId}`); } - if (!vector) { - throw new Error(`Unable to obtain embedding for task ${taskId}`); + if (vector.length !== config.embeddingDim) { + throw new Error( + `Embedding length mismatch for task ${taskId}: expected ${config.embeddingDim}, received ${vector.length}`, + ); } sendHeartbeat('matcher'); From a20720c007086c2bfd3a887846ecbf0becea0b53 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Wed, 5 Nov 2025 17:57:46 +0100 Subject: [PATCH 43/48] testing locks for onnx runtime error --- src/competence-matcher/src/utils/onnx-lock.ts | 48 +++++++++++++++++++ src/competence-matcher/src/utils/worker.ts | 6 ++- src/competence-matcher/src/worker/embedder.ts | 9 ++-- src/competence-matcher/src/worker/matcher.ts | 15 ++++-- .../src/worker/worker-manager.ts | 16 +++++-- 5 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 src/competence-matcher/src/utils/onnx-lock.ts diff --git a/src/competence-matcher/src/utils/onnx-lock.ts b/src/competence-matcher/src/utils/onnx-lock.ts new file mode 100644 index 000000000..9804b3511 --- /dev/null +++ b/src/competence-matcher/src/utils/onnx-lock.ts @@ -0,0 +1,48 @@ +import { workerData } from 'worker_threads'; + +// WorkerData is provided when the worker is constructed. We expect an object +// with an `onnxLock` SharedArrayBuffer so that all workers can coordinate +// access to the ONNX runtime. If absent, locking is skipped. +const sharedBuffer: SharedArrayBuffer | undefined = + workerData && workerData.onnxLock instanceof SharedArrayBuffer + ? (workerData.onnxLock as SharedArrayBuffer) + : undefined; + +const lockView = sharedBuffer ? new Int32Array(sharedBuffer) : null; + +function acquire(): void { + if (!lockView) return; + + while (true) { + const prev = Atomics.compareExchange(lockView, 0, 0, 1); + if (prev === 0) { + return; // we acquired the lock + } + + Atomics.wait(lockView, 0, 1); + } +} + +function release(): void { + if (!lockView) return; + Atomics.store(lockView, 0, 0); + Atomics.notify(lockView, 0, 1); +} + +export async function withOnnxLock(operation: () => Promise): Promise { + acquire(); + try { + return await operation(); + } finally { + release(); + } +} + +export function withOnnxLockSync(operation: () => T): T { + acquire(); + try { + return operation(); + } finally { + release(); + } +} diff --git a/src/competence-matcher/src/utils/worker.ts b/src/competence-matcher/src/utils/worker.ts index 9715884c3..9a9367037 100644 --- a/src/competence-matcher/src/utils/worker.ts +++ b/src/competence-matcher/src/utils/worker.ts @@ -7,7 +7,7 @@ import { config } from '../config'; const {} = config; -export function createWorker(filename: string): Worker { +export function createWorker(filename: string, workerData?: any): Worker { const tsPath = path.resolve(__dirname, `../worker/${filename}.ts`); const jsPath = path.resolve(__dirname, `../worker/${filename}.js`); const isTs = fs.existsSync(tsPath); @@ -18,7 +18,9 @@ export function createWorker(filename: string): Worker { ? [...process.execArgv, '-r', 'ts-node/register/transpile-only'] : process.execArgv; - const worker = new Worker(workerFile, { execArgv }); + const options = workerData ? { execArgv, workerData } : { execArgv }; + + const worker = new Worker(workerFile, options); return worker; } diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/embedder.ts index 789a44383..d93ff593e 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/embedder.ts @@ -4,6 +4,7 @@ import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../ import { EmbeddingJob, ResourceEmbeddingJob, TaskEmbeddingJob } from '../utils/types'; import { getDB } from '../utils/db'; import { config } from '../config'; +import { withOnnxLock } from '../utils/onnx-lock'; /** * New embedder worker that stays alive and processes jobs sequentially @@ -16,7 +17,7 @@ let modelsInitialised = false; async function ensureModelsInitialised() { if (modelsInitialised) return; try { - await Embedding.getInstance(); + await withOnnxLock(() => Embedding.getInstance()); modelsInitialised = true; workerLogger('system', 'debug', 'Embedder worker online', { threadId }); } catch (err) { @@ -116,7 +117,8 @@ async function processResourceEmbeddingJob(job: ResourceEmbeddingJob): Promise Embedding.embed(text)); + const [vector] = vectors; sendHeartbeat('embedder'); @@ -183,7 +185,8 @@ async function processTaskEmbeddingJob(job: TaskEmbeddingJob): Promise { sendHeartbeat('embedder'); try { - const [vector] = await Embedding.embed(description); + const vectors = await withOnnxLock(() => Embedding.embed(description)); + const [vector] = vectors; sendHeartbeat('embedder'); diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/matcher.ts index 014ab2006..10436601d 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/matcher.ts @@ -6,6 +6,7 @@ import ZeroShot, { labels } from '../tasks/semantic-zeroshot'; // import CrossEncoder from '../tasks/cross-encode'; import { config } from '../config'; import { getDB } from '../utils/db'; +import { withOnnxLock } from '../utils/onnx-lock'; /** * New matcher worker that stays alive and processes jobs sequentially @@ -18,7 +19,7 @@ let modelsInitialised = false; async function ensureModelsInitialised() { if (modelsInitialised) return; try { - await ZeroShot.getInstance(); + await withOnnxLock(() => ZeroShot.getInstance()); modelsInitialised = true; workerLogger('system', 'debug', 'Matcher worker online', { threadId }); } catch (err) { @@ -124,12 +125,18 @@ parentPort.on('message', async (message: any) => { config.matchDistanceMultiplier, ); - const sentiment = await ZeroShot.nliBiDirectional(description, match.text); + const sentiment = await withOnnxLock(() => + ZeroShot.nliBiDirectional(description, match.text), + ); sendHeartbeat('matcher'); - const contradiction = await ZeroShot.contradictionCheck(description, match.text); - const alignment = await ZeroShot.alignmentCheck(description, match.text); + const contradiction = await withOnnxLock(() => + ZeroShot.contradictionCheck(description, match.text), + ); + const alignment = await withOnnxLock(() => + ZeroShot.alignmentCheck(description, match.text), + ); sendHeartbeat('matcher'); diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 0af56200e..a596cc7fc 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -30,11 +30,13 @@ class WorkerPool { private readonly workerType: workerTypes; private readonly poolSize: number; private readonly maxJobRetries: number; + private readonly workerInitData?: Record; - constructor(workerType: workerTypes, poolSize: number) { + constructor(workerType: workerTypes, poolSize: number, workerInitData?: Record) { this.workerType = workerType; this.poolSize = poolSize; this.maxJobRetries = Math.max(0, jobMaxRetries); + this.workerInitData = workerInitData; logger.info('worker', `Initializing ${workerType} pool with ${poolSize} workers`, { workerType, @@ -74,7 +76,7 @@ class WorkerPool { } private createWorker(): void { - const worker = createWorker(this.workerType); + const worker = createWorker(this.workerType, this.workerInitData); this.workers.push(worker); logger.debug('worker', `Created new ${this.workerType} worker`, { @@ -515,8 +517,14 @@ class WorkerManager { matchingWorkers, }); - this.embeddingPool = new WorkerPool('embedder', embeddingWorkers); - this.matchingPool = new WorkerPool('matcher', matchingWorkers); + const onnxLockBuffer = new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT); + const onnxLockView = new Int32Array(onnxLockBuffer); + Atomics.store(onnxLockView, 0, 0); + + const sharedWorkerData = { onnxLock: onnxLockBuffer } as Record; + + this.embeddingPool = new WorkerPool('embedder', embeddingWorkers, sharedWorkerData); + this.matchingPool = new WorkerPool('matcher', matchingWorkers, sharedWorkerData); } public async enqueue(job: EmbeddingJob | MatchingJob, workerType: workerTypes): Promise { From 35cd87ebbde13e1da9de440e56b4789999b2eded Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Thu, 20 Nov 2025 10:12:45 +0100 Subject: [PATCH 44/48] ran prettier --- src/competence-matcher/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/competence-matcher/package.json b/src/competence-matcher/package.json index 8030bb2c0..78d8001ce 100644 --- a/src/competence-matcher/package.json +++ b/src/competence-matcher/package.json @@ -41,4 +41,4 @@ "engines": { "node": ">=23.5.0" } -} \ No newline at end of file +} From b0f7480cc8d1adb94ef11833708789464192e38b Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Sun, 23 Nov 2025 15:41:57 +0100 Subject: [PATCH 45/48] single worker inference (try-out) --- src/competence-matcher/src/utils/types.ts | 2 +- .../src/worker/inference.ts | 268 ++++++++++++++++++ .../src/worker/{ => multiworker}/embedder.ts | 117 ++------ .../src/worker/{ => multiworker}/matcher.ts | 140 ++------- .../src/worker/multiworker/worker-manager.ts | 235 +++++++++++++++ .../src/worker/worker-manager.ts | 61 +--- 6 files changed, 567 insertions(+), 256 deletions(-) create mode 100644 src/competence-matcher/src/worker/inference.ts rename src/competence-matcher/src/worker/{ => multiworker}/embedder.ts (68%) rename src/competence-matcher/src/worker/{ => multiworker}/matcher.ts (60%) create mode 100644 src/competence-matcher/src/worker/multiworker/worker-manager.ts diff --git a/src/competence-matcher/src/utils/types.ts b/src/competence-matcher/src/utils/types.ts index 825557b87..65eecb805 100644 --- a/src/competence-matcher/src/utils/types.ts +++ b/src/competence-matcher/src/utils/types.ts @@ -177,7 +177,7 @@ export type GroupedMatchResults = { resourceRanking: ResourceRanking; }; -export type workerTypes = 'embedder' | 'matcher'; +export type workerTypes = 'embedder' | 'matcher' | 'inference'; export interface WorkerQueue { job: any; diff --git a/src/competence-matcher/src/worker/inference.ts b/src/competence-matcher/src/worker/inference.ts new file mode 100644 index 000000000..376c7a2f2 --- /dev/null +++ b/src/competence-matcher/src/worker/inference.ts @@ -0,0 +1,268 @@ +import { parentPort, threadId } from 'worker_threads'; +import Embedding from '../tasks/embedding'; +import ZeroShot from '../tasks/semantic-zeroshot'; +import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../utils/worker'; +import { + EmbeddingJob, + ResourceEmbeddingJob, + TaskEmbeddingJob, + MatchingJob, + Match, +} from '../utils/types'; +import { getDB } from '../utils/db'; +import { config } from '../config'; + +if (!parentPort) throw new Error('This file must be run as a Worker thread'); + +let embeddingInitialised = false; +let matchingInitialised = false; + +async function ensureEmbeddingInitialised() { + if (embeddingInitialised) return; + await Embedding.getInstance(); + embeddingInitialised = true; + workerLogger('system', 'info', 'Inference worker embedding pipeline ready', { threadId }); +} +async function ensureMatchingInitialised() { + if (matchingInitialised) return; + await ZeroShot.getInstance(); + matchingInitialised = true; + workerLogger('system', 'info', 'Inference worker matching pipeline ready', { threadId }); +} + +startHeartbeat('inference', config.workerHeartbeatInterval); + +parentPort.on('message', async (raw: any) => { + const possible = raw as EmbeddingJob & MatchingJob; + const isEmbeddingJob = + (possible as EmbeddingJob).tasks && (possible as EmbeddingJob).mode !== undefined; + const jobId = possible.jobId; + workerLogger(jobId || 'system', 'debug', 'Inference worker received job', { + threadId, + jobId, + kind: isEmbeddingJob ? 'embedding' : 'matching', + taskCount: possible.tasks?.length || 0, + }); + try { + if (isEmbeddingJob) await ensureEmbeddingInitialised(); + else await ensureMatchingInitialised(); + } catch (err) { + workerLogger( + jobId || 'system', + 'error', + 'Inference worker failed model init', + { threadId, kind: isEmbeddingJob ? 'embedding' : 'matching' }, + err instanceof Error ? err : new Error(String(err)), + ); + parentPort!.postMessage({ + type: 'error', + jobId, + error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, + }); + parentPort!.postMessage({ type: 'job_completed', jobId }); + return; + } + if (isEmbeddingJob) { + await handleEmbeddingJob(raw as EmbeddingJob); + } else { + await handleMatchingJob(raw as MatchingJob); + } +}); + +async function handleEmbeddingJob(job: EmbeddingJob) { + try { + if ((job as TaskEmbeddingJob).mode === 'task') + await processTaskEmbeddingJob(job as TaskEmbeddingJob); + else await processResourceEmbeddingJob(job as ResourceEmbeddingJob); + workerLogger(job.jobId, 'info', 'Embedding job completed', { threadId }); + } catch (error) { + workerLogger( + job.jobId, + 'error', + 'Embedding job failed', + { threadId }, + error instanceof Error ? error : new Error(String(error)), + ); + } finally { + parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); + } +} + +async function processResourceEmbeddingJob(job: ResourceEmbeddingJob): Promise { + workerLogger(job.jobId, 'debug', `Resource embedding job start (${job.tasks.length} tasks)`, { + threadId, + }); + await withJobUpdates(job, async (db, { tasks, jobId }) => { + for (const { listId, resourceId, competenceId, text, type } of tasks) { + sendHeartbeat('inference'); + try { + const vectors = await Embedding.embed(text); + const [vector] = vectors; + sendHeartbeat('inference'); + db.upsertEmbedding({ listId, resourceId, competenceId, text, type, embedding: vector }); + } catch (err) { + workerLogger( + jobId, + 'error', + 'Failed embedding task', + { threadId, competenceId, type }, + err instanceof Error ? err : new Error(String(err)), + ); + parentPort!.postMessage({ + type: 'error', + jobId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + }); +} + +async function processTaskEmbeddingJob(job: TaskEmbeddingJob): Promise { + workerLogger(job.jobId, 'debug', `Task embedding job start (${job.tasks.length} tasks)`, { + threadId, + }); + const dbInstance = getDB(job.dbName); + await withJobUpdates( + job, + async (db, { tasks, jobId }) => { + for (const task of tasks) { + const { taskId, description } = task; + if (!description) continue; + sendHeartbeat('inference'); + try { + const vectors = await Embedding.embed(description); + const [vector] = vectors; + db.upsertTaskEmbedding(jobId, taskId, vector); + sendHeartbeat('inference'); + } catch (err) { + workerLogger( + jobId, + 'error', + 'Failed task embedding', + { threadId, taskId }, + err instanceof Error ? err : new Error(String(err)), + ); + parentPort!.postMessage({ + type: 'error', + jobId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + }, + { + onStart: () => { + dbInstance.updateJobStatus(job.jobId, 'preprocessing'); + parentPort!.postMessage({ type: 'status', jobId: job.jobId, status: 'preprocessing' }); + }, + onDone: () => { + dbInstance.updateJobStatus(job.jobId, 'pending'); + parentPort!.postMessage({ type: 'status', jobId: job.jobId, status: 'pending' }); + }, + }, + ); +} + +async function handleMatchingJob(job: MatchingJob) { + workerLogger(job.jobId, 'debug', `Matching job start (${job.tasks.length} tasks)`, { threadId }); + try { + const matchResults: Record = {}; + for (const t of job.tasks) if (t.description) matchResults[t.description] = []; + await withJobUpdates( + job, + async (db, { jobId, tasks, listId: listIdFilter, resourceId: resourceIdFilter }) => { + for (const task of tasks) { + const { taskId, description } = task; + if (!description) continue; + sendHeartbeat('inference'); + try { + const vector = job.taskEmbeddings?.[taskId] ?? db.getTaskEmbedding(jobId, taskId); + if (!vector) throw new Error(`No embedding stored for task ${taskId}`); + if (vector.length !== config.embeddingDim) + throw new Error(`Embedding length mismatch for task ${taskId}`); + sendHeartbeat('inference'); + const matches: Match[] = db.searchEmbedding(vector, { + filter: { listId: listIdFilter, resourceId: resourceIdFilter }, + }); + for (const match of matches) { + let flag = 'neutral'; + let newDistance = Math.min( + 1, + Math.max(0, match.distance - config.matchDistanceOffset) * + config.matchDistanceMultiplier, + ); + const sentiment = await ZeroShot.nliBiDirectional(description, match.text); + sendHeartbeat('inference'); + const contradiction = await ZeroShot.contradictionCheck(description, match.text); + const alignment = await ZeroShot.alignmentCheck(description, match.text); + sendHeartbeat('inference'); + if ( + sentiment.ranking[0] === 'contradict' || + sentiment.contradict > config.contradictionThreshold || + contradiction.contradicting + ) { + flag = 'contradicting'; + newDistance = 0.0; + } else if ( + sentiment.entail > config.entailmentThreshold && + match.distance > (config as any).alignmentDistanceThreshold && + alignment.aligning + ) { + newDistance = Math.min(1, newDistance * (config as any).alignmentBoostMultiplier); + flag = 'aligning'; + } else { + newDistance *= (config as any).neutralReductionMultiplier || 1; + } + matchResults[description].push({ + jobId, + taskId, + taskText: description, + competenceId: match.competenceId, + resourceId: match.resourceId, + text: match.text, + type: match.type as 'name' | 'description' | 'proficiencyLevel', + alignment: flag, + distance: newDistance, + reason: match.reason, + }); + } + } catch (err) { + workerLogger( + jobId, + 'error', + `Failed to process task ${taskId}`, + { threadId, taskId }, + err instanceof Error ? err : new Error(String(err)), + ); + parentPort!.postMessage({ + type: 'error', + jobId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + }, + { + onDone: () => + parentPort!.postMessage({ type: 'job', job: 'reason', workload: matchResults }), + }, + ); + workerLogger(job.jobId, 'info', 'Matching job completed', { threadId }); + } catch (err) { + workerLogger( + job.jobId, + 'error', + 'Matching job failed', + { threadId }, + err instanceof Error ? err : new Error(String(err)), + ); + } finally { + try { + getDB(job.dbName).deleteTaskEmbeddings(job.jobId); + } catch {} + parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); + } +} + +workerLogger('system', 'debug', 'Inference worker ready', { threadId }); diff --git a/src/competence-matcher/src/worker/embedder.ts b/src/competence-matcher/src/worker/multiworker/embedder.ts similarity index 68% rename from src/competence-matcher/src/worker/embedder.ts rename to src/competence-matcher/src/worker/multiworker/embedder.ts index d93ff593e..cf9e11e2e 100644 --- a/src/competence-matcher/src/worker/embedder.ts +++ b/src/competence-matcher/src/worker/multiworker/embedder.ts @@ -1,38 +1,26 @@ +// Backup original embedder worker (multi-worker mode) import { parentPort, threadId } from 'worker_threads'; -import Embedding from '../tasks/embedding'; -import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../utils/worker'; -import { EmbeddingJob, ResourceEmbeddingJob, TaskEmbeddingJob } from '../utils/types'; -import { getDB } from '../utils/db'; -import { config } from '../config'; -import { withOnnxLock } from '../utils/onnx-lock'; +import Embedding from '../../tasks/embedding'; +import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../../utils/worker'; +import { EmbeddingJob, ResourceEmbeddingJob, TaskEmbeddingJob } from '../../utils/types'; +import { getDB } from '../../utils/db'; +import { config } from '../../config'; +import { withOnnxLock } from '../../utils/onnx-lock'; -/** - * New embedder worker that stays alive and processes jobs sequentially - */ -if (!parentPort) { - throw new Error('This file must be run as a Worker thread'); -} +if (!parentPort) throw new Error('This file must be run as a Worker thread'); let modelsInitialised = false; async function ensureModelsInitialised() { if (modelsInitialised) return; - try { - await withOnnxLock(() => Embedding.getInstance()); - modelsInitialised = true; - workerLogger('system', 'debug', 'Embedder worker online', { threadId }); - } catch (err) { - throw err; - } + await withOnnxLock(() => Embedding.getInstance()); + modelsInitialised = true; + workerLogger('system', 'debug', 'Embedder worker online', { threadId }); } -// Start heartbeat immediately startHeartbeat('embedder', config.workerHeartbeatInterval); -// Set up job message handler parentPort.on('message', async (message: any) => { - // Handle job messages const job = message as EmbeddingJob; - workerLogger(job.jobId || 'system', 'debug', 'Embedder worker received job', { threadId, jobId: job.jobId, @@ -40,7 +28,6 @@ parentPort.on('message', async (message: any) => { mode: job.mode ?? 'resource', }); - // ensure models are initialised (but do not run this for health_check) try { await ensureModelsInitialised(); } catch (err) { @@ -51,13 +38,11 @@ parentPort.on('message', async (message: any) => { { threadId, jobId: job.jobId }, err instanceof Error ? err : new Error(String(err)), ); - // Notify parent and exit or mark job failed parentPort!.postMessage({ type: 'error', jobId: job.jobId, error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, }); - // still send job_completed so worker pool can continue parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); return; } @@ -68,8 +53,7 @@ parentPort.on('message', async (message: any) => { } else { await processResourceEmbeddingJob(job as ResourceEmbeddingJob); } - - workerLogger(job.jobId, 'debug', `Embedding job completed`, { + workerLogger(job.jobId, 'debug', 'Embedding job completed', { threadId, taskCount: job.tasks.length, mode: job.mode ?? 'resource', @@ -78,79 +62,46 @@ parentPort.on('message', async (message: any) => { workerLogger( job.jobId, 'debug', - `Embedding job failed`, - { - threadId, - mode: job.mode ?? 'resource', - }, + 'Embedding job failed', + { threadId, mode: job.mode ?? 'resource' }, error instanceof Error ? error : new Error(String(error)), ); } finally { - parentPort!.postMessage({ - type: 'job_completed', - jobId: job.jobId, - }); + parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); } }); -workerLogger('system', 'debug', `Embedder worker thread ready`, { - threadId, -}); +workerLogger('system', 'debug', 'Embedder worker thread ready', { threadId }); async function processResourceEmbeddingJob(job: ResourceEmbeddingJob): Promise { workerLogger( job.jobId, 'debug', `Starting resource embedding job with ${job.tasks.length} tasks`, - { - threadId, - taskCount: job.tasks.length, - }, + { threadId, taskCount: job.tasks.length }, ); - await withJobUpdates(job, async (db, { tasks, jobId }) => { - let work = tasks; - - // TODO: Re-enable semantic splitting once the worker crash issue is resolved - // work = await splitSemantically(tasks); - - for (const { listId, resourceId, competenceId, text, type } of work) { + for (const { listId, resourceId, competenceId, text, type } of tasks) { sendHeartbeat('embedder'); try { const vectors = await withOnnxLock(() => Embedding.embed(text)); const [vector] = vectors; - sendHeartbeat('embedder'); - workerLogger(jobId, 'debug', `Generated embedding for ${type} text`, { threadId, competenceId, textLength: text.length, }); - - db.upsertEmbedding({ - listId, - resourceId, - competenceId, - text, - type, - embedding: vector, - }); - + db.upsertEmbedding({ listId, resourceId, competenceId, text, type, embedding: vector }); sendHeartbeat('embedder'); } catch (error) { workerLogger( jobId, 'error', - `Failed to process embedding task`, - { - threadId, - competenceId, - type, - }, + 'Failed to process embedding task', + { threadId, competenceId, type }, error instanceof Error ? error : new Error(String(error)), ); - parentPort!.postMessage({ type: 'error', jobId, @@ -166,9 +117,7 @@ async function processTaskEmbeddingJob(job: TaskEmbeddingJob): Promise { threadId, taskCount: job.tasks.length, }); - const dbInstance = getDB(job.dbName); - await withJobUpdates( job, async (db, { tasks, jobId }) => { @@ -181,36 +130,26 @@ async function processTaskEmbeddingJob(job: TaskEmbeddingJob): Promise { }); continue; } - sendHeartbeat('embedder'); - try { const vectors = await withOnnxLock(() => Embedding.embed(description)); const [vector] = vectors; - sendHeartbeat('embedder'); - workerLogger(jobId, 'debug', 'Generated task embedding', { threadId, taskId, textLength: description.length, }); - db.upsertTaskEmbedding(jobId, taskId, vector); - sendHeartbeat('embedder'); } catch (error) { workerLogger( jobId, 'error', 'Failed to generate task embedding', - { - threadId, - taskId, - }, + { threadId, taskId }, error instanceof Error ? error : new Error(String(error)), ); - parentPort!.postMessage({ type: 'error', jobId, @@ -222,19 +161,11 @@ async function processTaskEmbeddingJob(job: TaskEmbeddingJob): Promise { { onStart: () => { dbInstance.updateJobStatus(job.jobId, 'preprocessing'); - parentPort!.postMessage({ - type: 'status', - jobId: job.jobId, - status: 'preprocessing', - }); + parentPort!.postMessage({ type: 'status', jobId: job.jobId, status: 'preprocessing' }); }, onDone: () => { dbInstance.updateJobStatus(job.jobId, 'pending'); - parentPort!.postMessage({ - type: 'status', - jobId: job.jobId, - status: 'pending', - }); + parentPort!.postMessage({ type: 'status', jobId: job.jobId, status: 'pending' }); }, }, ); diff --git a/src/competence-matcher/src/worker/matcher.ts b/src/competence-matcher/src/worker/multiworker/matcher.ts similarity index 60% rename from src/competence-matcher/src/worker/matcher.ts rename to src/competence-matcher/src/worker/multiworker/matcher.ts index 10436601d..e67b0f0e0 100644 --- a/src/competence-matcher/src/worker/matcher.ts +++ b/src/competence-matcher/src/worker/multiworker/matcher.ts @@ -1,48 +1,33 @@ +// Backup original matcher worker (multi-worker mode) import { parentPort, threadId } from 'worker_threads'; -import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../utils/worker'; -import { addReason } from '../tasks/reason'; -import { Match, MatchingJob } from '../utils/types'; -import ZeroShot, { labels } from '../tasks/semantic-zeroshot'; -// import CrossEncoder from '../tasks/cross-encode'; -import { config } from '../config'; -import { getDB } from '../utils/db'; -import { withOnnxLock } from '../utils/onnx-lock'; +import { withJobUpdates, workerLogger, startHeartbeat, sendHeartbeat } from '../../utils/worker'; +import { addReason } from '../../tasks/reason'; +import { Match, MatchingJob } from '../../utils/types'; +import ZeroShot from '../../tasks/semantic-zeroshot'; +import { config } from '../../config'; +import { getDB } from '../../utils/db'; +import { withOnnxLock } from '../../utils/onnx-lock'; -/** - * New matcher worker that stays alive and processes jobs sequentially - */ -if (!parentPort) { - throw new Error('This file must be run as a Worker thread'); -} +if (!parentPort) throw new Error('This file must be run as a Worker thread'); let modelsInitialised = false; async function ensureModelsInitialised() { if (modelsInitialised) return; - try { - await withOnnxLock(() => ZeroShot.getInstance()); - modelsInitialised = true; - workerLogger('system', 'debug', 'Matcher worker online', { threadId }); - } catch (err) { - // Bubble up so job handling can report the error - throw err; - } + await withOnnxLock(() => ZeroShot.getInstance()); + modelsInitialised = true; + workerLogger('system', 'debug', 'Matcher worker online', { threadId }); } -// Start heartbeat immediately startHeartbeat('matcher', config.workerHeartbeatInterval); -// Set up job message handler parentPort.on('message', async (message: any) => { - // Handle job messages const job = message as MatchingJob; - workerLogger(job.jobId || 'system', 'debug', 'Matcher worker received job', { threadId, jobId: job.jobId, taskCount: job.tasks?.length || 0, listId: job.listId, }); - try { await ensureModelsInitialised(); } catch (err) { @@ -53,95 +38,59 @@ parentPort.on('message', async (message: any) => { { threadId, jobId: job.jobId }, err instanceof Error ? err : new Error(String(err)), ); - // Notify parent and exit or mark job failed parentPort!.postMessage({ type: 'error', jobId: job.jobId, error: `Model initialisation failed: ${err instanceof Error ? err.message : String(err)}`, }); - // still send job_completed so worker pool can continue parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); return; } - workerLogger(job.jobId, 'debug', `Starting matching job with ${job.tasks.length} tasks`, { threadId, taskCount: job.tasks.length, }); - try { - // Store match results for reasoning workaround const matchResults: { [description: string]: any[] } = {}; - for (const task of job.tasks) { - const { description } = task; - if (!description) { - continue; // Skip tasks without description - } - matchResults[description] = []; - } - + for (const task of job.tasks) if (task.description) matchResults[task.description] = []; await withJobUpdates( job, async (db, { jobId, tasks, listId: listIdFilter, resourceId: resourceIdFilter }) => { for (const task of tasks) { - const { taskId, name, description, executionInstructions, requiredCompetencies } = task; - - if (!description) { - continue; // Skip tasks without description - } - + const { taskId, description } = task; + if (!description) continue; sendHeartbeat('matcher'); - try { - // Retrieve stored embedding for the task const vector = job.taskEmbeddings?.[taskId] ?? db.getTaskEmbedding(jobId, taskId); - - if (!vector) { - throw new Error(`No embedding stored for task ${taskId}`); - } - - if (vector.length !== config.embeddingDim) { + if (!vector) throw new Error(`No embedding stored for task ${taskId}`); + if (vector.length !== config.embeddingDim) throw new Error( `Embedding length mismatch for task ${taskId}: expected ${config.embeddingDim}, received ${vector.length}`, ); - } - sendHeartbeat('matcher'); - - // Search for matches in the competence database const matches: Match[] = db.searchEmbedding(vector, { - filter: { - listId: listIdFilter, - resourceId: resourceIdFilter, // Optional: If matching against a single resource - }, + filter: { listId: listIdFilter, resourceId: resourceIdFilter }, }); for (const match of matches) { - let flag = 'neutral'; // Default flag - - // Balance distance + let flag = 'neutral'; let newDistance = Math.min( 1, Math.max(0, match.distance - config.matchDistanceOffset) * config.matchDistanceMultiplier, ); - const sentiment = await withOnnxLock(() => ZeroShot.nliBiDirectional(description, match.text), ); - sendHeartbeat('matcher'); - const contradiction = await withOnnxLock(() => ZeroShot.contradictionCheck(description, match.text), ); const alignment = await withOnnxLock(() => ZeroShot.alignmentCheck(description, match.text), ); - sendHeartbeat('matcher'); - if ( - sentiment.ranking[0] == 'contradict' || + sentiment.ranking[0] === 'contradict' || sentiment.contradict > config.contradictionThreshold || contradiction.contradicting ) { @@ -158,7 +107,6 @@ parentPort.on('message', async (message: any) => { flag = 'neutral'; newDistance *= config.neutralReductionMultiplier; } - matchResults[description].push({ jobId, taskId, @@ -173,19 +121,13 @@ parentPort.on('message', async (message: any) => { }); } } catch (error) { - // Log error for task processing but continue with other tasks workerLogger( jobId, 'error', `Failed to process task ${taskId}`, - { - threadId, - taskId, - }, + { threadId, taskId }, error instanceof Error ? error : new Error(String(error)), ); - - // Individual task errors don't fail the entire job parentPort!.postMessage({ type: 'error', jobId, @@ -195,58 +137,36 @@ parentPort.on('message', async (message: any) => { } }, { - // When job processing is done, send results for reasoning - onDone: () => { - parentPort!.postMessage({ - type: 'job', - job: 'reason', - workload: matchResults, - }); - }, + onDone: () => + parentPort!.postMessage({ type: 'job', job: 'reason', workload: matchResults }), }, ); - - // Job completed successfully - workerLogger(job.jobId, 'debug', `Matching job completed`, { + workerLogger(job.jobId, 'debug', 'Matching job completed', { threadId, taskCount: job.tasks.length, }); } catch (error) { - // Job-level error - already handled by withJobUpdates - // Just log it for worker context workerLogger( job.jobId, 'debug', - `Matching job failed`, - { - threadId, - }, + 'Matching job failed', + { threadId }, error instanceof Error ? error : new Error(String(error)), ); } finally { try { - const cleanupDb = getDB(job.dbName); - cleanupDb.deleteTaskEmbeddings(job.jobId); + getDB(job.dbName).deleteTaskEmbeddings(job.jobId); } catch (error) { workerLogger( job.jobId, 'warn', 'Failed to clean up task embeddings after matching job', - { - threadId, - }, + { threadId }, error instanceof Error ? error : new Error(String(error)), ); } - - // Always notify job completion so worker can process next job - parentPort!.postMessage({ - type: 'job_completed', - jobId: job.jobId, - }); + parentPort!.postMessage({ type: 'job_completed', jobId: job.jobId }); } }); -workerLogger('system', 'debug', `Matcher worker thread ready`, { - threadId, -}); +workerLogger('system', 'debug', 'Matcher worker thread ready', { threadId }); diff --git a/src/competence-matcher/src/worker/multiworker/worker-manager.ts b/src/competence-matcher/src/worker/multiworker/worker-manager.ts new file mode 100644 index 000000000..6a1d87b87 --- /dev/null +++ b/src/competence-matcher/src/worker/multiworker/worker-manager.ts @@ -0,0 +1,235 @@ +// Backup original multi-worker manager +import { Worker } from 'worker_threads'; +import { config } from '../../config'; +import { createWorker } from '../../utils/worker'; +import { EmbeddingJob, JobQueueItem, MatchingJob, workerTypes } from '../../utils/types'; +import { WorkerError } from '../../utils/errors'; +import { getLogger } from '../../utils/logger'; +import { addReason } from '../../tasks/reason'; +import { getDB } from '../../utils/db'; + +const { embeddingWorkers, matchingWorkers, workerDeathTimeout, jobMaxRetries } = config; +const logger = getLogger(); + +class WorkerPool { + private workers: Worker[] = []; + private availableWorkers: Set = new Set(); + private busyWorkers: Map = new Map(); + private activeJobs: Map = new Map(); + private jobQueue: JobQueueItem[] = []; + private workerDeathTimers: Map = new Map(); + private workersBeingReplaced: Set = new Set(); + private readonly workerType: workerTypes; + private readonly poolSize: number; + private readonly maxJobRetries: number; + private readonly workerInitData?: Record; + constructor(workerType: workerTypes, poolSize: number, workerInitData?: Record) { + this.workerType = workerType; + this.poolSize = poolSize; + this.maxJobRetries = Math.max(0, jobMaxRetries); + this.workerInitData = workerInitData; + this.initialiseWorkers(); + } + private initialiseWorkers(): void { + for (let i = 0; i < this.poolSize; i++) this.createWorker(); + } + private createWorker(): void { + const worker = createWorker(`multiworker/${this.workerType}`, this.workerInitData); + this.workers.push(worker); + const deathTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); + this.workerDeathTimers.set(worker, deathTimer); + worker.on('message', (m: any) => { + if (m.type === 'heartbeat') this.handleHeartbeat(worker); + else if (m.type === 'log') this.handleWorkerLog(m); + }); + worker.on('error', (e) => this.handleWorkerFailure(worker, e)); + worker.once('exit', () => this.handleWorkerFailure(worker, new Error('Worker exited'))); + } + private handleHeartbeat(worker: Worker): void { + const existing = this.workerDeathTimers.get(worker); + if (existing) clearTimeout(existing); + const timer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); + this.workerDeathTimers.set(worker, timer); + if (!this.busyWorkers.has(worker)) { + this.availableWorkers.add(worker); + this.processNextJob(); + } + } + private handleWorkerLog(message: any): void { + const { level, logType, message: logMessage, data, error } = message; + if (error) { + const reconstructed = new Error(error.message); + reconstructed.stack = error.stack; + reconstructed.name = error.name; + getLogger()[level as 'debug' | 'info' | 'warn' | 'error']( + logType, + logMessage, + reconstructed, + data, + ); + } else { + getLogger()[level as 'debug' | 'info' | 'warn' | 'error'](logType, logMessage, data); + } + } + private killWorker(worker: Worker): void { + if (this.workersBeingReplaced.has(worker.threadId)) return; + this.workersBeingReplaced.add(worker.threadId); + const activeJob = this.activeJobs.get(worker); + if (activeJob) { + this.activeJobs.delete(worker); + this.busyWorkers.delete(worker); + this.handleJobFailure(activeJob, new Error('Worker heartbeat timeout')); + } + this.removeWorker(worker); + this.createWorker(); + this.workersBeingReplaced.delete(worker.threadId); + this.processNextJob(); + } + private handleWorkerFailure(worker: Worker, error: Error): void { + if (this.workersBeingReplaced.has(worker.threadId)) return; + this.workersBeingReplaced.add(worker.threadId); + const activeJob = this.activeJobs.get(worker); + if (activeJob) { + this.activeJobs.delete(worker); + this.busyWorkers.delete(worker); + this.handleJobFailure(activeJob, error); + } + this.removeWorker(worker); + this.createWorker(); + this.workersBeingReplaced.delete(worker.threadId); + if (activeJob) this.processNextJob(); + } + private removeWorker(worker: Worker): void { + this.availableWorkers.delete(worker); + this.busyWorkers.delete(worker); + this.activeJobs.delete(worker); + const timer = this.workerDeathTimers.get(worker); + if (timer) { + clearTimeout(timer); + this.workerDeathTimers.delete(worker); + } + const index = this.workers.indexOf(worker); + if (index > -1) this.workers.splice(index, 1); + worker.removeAllListeners(); + worker.terminate(); + this.workersBeingReplaced.delete(worker.threadId); + } + public async executeJob(job: EmbeddingJob | MatchingJob): Promise { + return new Promise((resolve, reject) => { + this.jobQueue.push({ job, resolve, reject, retryCount: 0 }); + this.processNextJob(); + }); + } + private processing = false; + private processNextJob(): void { + if (this.processing) return; + if (this.jobQueue.length === 0 || this.availableWorkers.size === 0) return; + this.processing = true; + const worker = this.availableWorkers.values().next().value as Worker; + const item = this.jobQueue.shift()!; + this.assignJobToWorker(worker, item); + this.processing = false; + } + private assignJobToWorker(worker: Worker, queueItem: JobQueueItem): void { + const { job, resolve, reject, retryCount } = queueItem; + this.availableWorkers.delete(worker); + this.busyWorkers.set(worker, job.jobId); + this.activeJobs.set(worker, queueItem); + const handler = (message: any) => { + if (message.type === 'job_completed' && message.jobId === job.jobId) { + worker.removeListener('message', handler); + this.activeJobs.delete(worker); + this.markWorkerAvailable(worker); + resolve(message.result || 'Job completed'); + this.processNextJob(); + } else if (message.type === 'error' && message.jobId === job.jobId) { + worker.removeListener('message', handler); + this.activeJobs.delete(worker); + this.markWorkerAvailable(worker); + this.handleJobFailure(queueItem, new Error(message.error)); + this.processNextJob(); + } else if (message.type === 'job' && message.job === 'reason') { + this.handleReasoning(job, message); + } + }; + worker.on('message', handler); + worker.postMessage(job); + } + private markWorkerAvailable(worker: Worker): void { + this.busyWorkers.delete(worker); + this.availableWorkers.add(worker); + } + private handleJobFailure(queueItem: JobQueueItem, error: Error): void { + const { job, resolve, reject, retryCount } = queueItem; + if (retryCount < this.maxJobRetries) { + this.jobQueue.unshift({ job, resolve, reject, retryCount: retryCount + 1 }); + } else { + reject(new WorkerError(this.workerType, job.jobId, error)); + } + } + private async handleReasoning(job: any, message: any): Promise { + const finalMatches = []; + for (const [task, matches] of Object.entries(message.workload)) { + try { + const taskMatches = await addReason(matches as any[], task); + finalMatches.push(...taskMatches); + } catch { + finalMatches.push(...(matches as any[])); + } + } + const db = getDB(job.dbName); + for (const match of finalMatches) { + try { + db.addMatchResult({ + jobId: match.jobId, + taskId: match.taskId, + taskText: match.taskText, + competenceId: match.competenceId, + resourceId: match.resourceId, + distance: match.distance, + text: match.text, + type: match.type, + alignment: match.alignment, + reason: match.reason, + }); + } catch {} + } + try { + db.updateJobStatus(job.jobId, 'completed'); + } catch {} + } + public async shutdown(): Promise { + this.jobQueue.forEach((i) => i.reject(new Error('Shutting down'))); + this.activeJobs.forEach((i) => i.reject(new Error('Shutting down'))); + this.workerDeathTimers.forEach((t) => clearTimeout(t)); + this.workers.forEach((w) => w.terminate()); + this.jobQueue.length = 0; + this.activeJobs.clear(); + this.workerDeathTimers.clear(); + this.workers.length = 0; + this.availableWorkers.clear(); + this.busyWorkers.clear(); + } +} +class WorkerManager { + private embeddingPool: WorkerPool; + private matchingPool: WorkerPool; + constructor() { + const onnxLockBuffer = new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT); + const view = new Int32Array(onnxLockBuffer); + Atomics.store(view, 0, 0); + const sharedWorkerData = { onnxLock: onnxLockBuffer } as Record; + this.embeddingPool = new WorkerPool('embedder', embeddingWorkers, sharedWorkerData); + this.matchingPool = new WorkerPool('matcher', matchingWorkers, sharedWorkerData); + } + public async enqueue(job: EmbeddingJob | MatchingJob, workerType: workerTypes): Promise { + if (workerType === 'embedder') return this.embeddingPool.executeJob(job); + if (workerType === 'matcher') return this.matchingPool.executeJob(job); + throw new Error(`Unknown worker type: ${workerType}`); + } + public async shutdown(): Promise { + await Promise.all([this.embeddingPool.shutdown(), this.matchingPool.shutdown()]); + } +} +const manager = new WorkerManager(); +export default manager; diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index a596cc7fc..9ce76090b 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -54,29 +54,12 @@ class WorkerPool { } private initialiseWorkers(): void { - logger.debug('worker', `Initializing ${this.poolSize} ${this.workerType} workers`, { - workerType: this.workerType, - poolSize: this.poolSize, - }); - - for (let i = 0; i < this.poolSize; i++) { - logger.debug('worker', `Creating ${this.workerType} worker ${i + 1}/${this.poolSize}`, { - workerType: this.workerType, - workerIndex: i + 1, - totalToCreate: this.poolSize, - }); - this.createWorker(); - } - - logger.debug('worker', `Finished initializing ${this.workerType} workers`, { - workerType: this.workerType, - workersCreated: this.workers.length, - expectedWorkers: this.poolSize, - }); + for (let i = 0; i < this.poolSize; i++) this.createWorker(); } private createWorker(): void { - const worker = createWorker(this.workerType, this.workerInitData); + const script = this.workerType === 'inference' ? 'inference' : this.workerType; + const worker = createWorker(script, this.workerInitData); this.workers.push(worker); logger.debug('worker', `Created new ${this.workerType} worker`, { @@ -508,50 +491,24 @@ class WorkerPool { * WorkerManager - High-level interface for managing worker pools */ class WorkerManager { - private embeddingPool: WorkerPool; - private matchingPool: WorkerPool; - + private inferencePool: WorkerPool; constructor() { - logger.info('worker', 'Initializing WorkerManager', { + logger.info('worker', 'Initialising Monolithic WorkerManager (inference)', { embeddingWorkers, matchingWorkers, }); - - const onnxLockBuffer = new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT); - const onnxLockView = new Int32Array(onnxLockBuffer); - Atomics.store(onnxLockView, 0, 0); - - const sharedWorkerData = { onnxLock: onnxLockBuffer } as Record; - - this.embeddingPool = new WorkerPool('embedder', embeddingWorkers, sharedWorkerData); - this.matchingPool = new WorkerPool('matcher', matchingWorkers, sharedWorkerData); + this.inferencePool = new WorkerPool('inference', 1); } - public async enqueue(job: EmbeddingJob | MatchingJob, workerType: workerTypes): Promise { - logger.info('worker', `Enqueuing job to ${workerType} pool`, { + logger.info('worker', `Enqueuing ${workerType} job (routed to inference)`, { workerType, jobId: job.jobId, }); - - if (workerType === 'embedder') { - return this.embeddingPool.executeJob(job); - } else if (workerType === 'matcher') { - return this.matchingPool.executeJob(job); - } else { - const error = new Error(`Unknown worker type: ${workerType}`); - logger.error('worker', 'Unknown worker type requested', error, { - workerType, - jobId: job.jobId, - }); - throw error; - } + return this.inferencePool.executeJob(job); } - public async shutdown(): Promise { - await Promise.all([this.embeddingPool.shutdown(), this.matchingPool.shutdown()]); + await this.inferencePool.shutdown(); } } - -// Export singleton instance const manager = new WorkerManager(); export default manager; From 817e9640c649d8ca3ea270cf6e66ba1563a0354d Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Mon, 24 Nov 2025 09:28:21 +0100 Subject: [PATCH 46/48] refactor mono worker --- src/competence-matcher/src/config.ts | 19 +- .../src/worker/worker-manager.ts | 679 +++++++----------- 2 files changed, 252 insertions(+), 446 deletions(-) diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index a5065a02f..fe5117d2a 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -16,29 +16,16 @@ export const config = { ollamaBearerToken: process.env.OLLAMA_BEARER_TOKEN || '', ollamaBatchSize: parseInt(process.env.OLLAMA_BATCH_SIZE || '20', 10), splittingModel: process.env.SPLITTING_MODEL || 'llama3.2', - splittingLength: parseInt(process.env.SPLITTING_LENGTH || '1000', 10), // Set this to 0 to disable splitting + splittingLength: parseInt(process.env.SPLITTING_LENGTH || '1000', 0), // Set this to 0 to disable splitting reasonModel: process.env.REASON_MODEL || 'llama3.2', splittingSymbol: process.env.SPLITTING_SYMBOL || '', - embeddingWorkers: parseInt(process.env.EMBEDDING_WORKERS || '1', 10), // Number of embedding workers to keep alive - matchingWorkers: parseInt(process.env.MATCHING_WORKERS || '1', 10), // Number of matching workers to keep alive - workerHeartbeatInterval: parseInt(process.env.WORKER_HEARTBEAT_INTERVAL || '60', 10) * 1_000, // Worker heartbeat interval in seconds (converted to ms) - how often workers send heartbeats - workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '240', 10) * 1_000, // Worker death timeout in seconds (converted to ms) - how long to wait before considering worker dead - maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds + workerHeartbeatInterval: parseInt(process.env.WORKER_HEARTBEAT_INTERVAL || '120', 10) * 1_000, + workerDeathTimeout: parseInt(process.env.WORKER_DEATH_TIMEOUT || '360', 10) * 1_000, logLevel: process.env.LOG_LEVEL || 'INFO', // Levels: 'DEBUG', 'INFO', 'WARN', 'ERROR' logTypes: process.env.LOG_TYPES || 'server,request,worker,database,model,system', logToConsole: process.env.LOG_CONSOLE !== 'false', // Default to true unless explicitly set to false logToFile: process.env.LOG_FILE === 'true' || false, // Default to false unless explicitly set to true logPath: process.env.LOG_PATH || 'logs/', - workerHealthCheckTimeout: - parseInt( - process.env.WORKER_HEALTH_CHECK_TIMEOUT || process.env.MODEL_LOADING_TIMEOUT || '20', - 10, - ) * 1_000, // Maximum time to wait for individual worker health check response (seconds to ms) - systemStartupTimeout: - parseInt(process.env.SYSTEM_STARTUP_TIMEOUT || process.env.MODEL_LOADING_TIME || '300', 10) * - 1_000, // Maximum time to wait for all worker pools to become ready at startup (seconds to ms) - maxWorkerRetries: parseInt(process.env.MAX_WORKER_RETRIES || '3', 10), // Maximum worker restart attempts before escalating to ERROR - workerRetryWindow: parseInt(process.env.WORKER_RETRY_WINDOW || '300', 10) * 1_000, // Time window in seconds to reset retry count (converted to ms) jobMaxRetries: parseInt(process.env.JOB_MAX_RETRIES || '3', 10), // Maximum number of retries for a job before failing it maxOllamaRetries: parseInt(process.env.MAX_OLLAMA_RETRIES || '5', 10), // Maximum model pull retry attempts ollamaRetryDelay: parseInt(process.env.OLLAMA_RETRY_DELAY || '30', 10) * 1_000, // Base delay between retries in seconds (converted to ms) diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 9ce76090b..160ecd911 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -1,443 +1,277 @@ import { Worker } from 'worker_threads'; import { config } from '../config'; import { createWorker } from '../utils/worker'; -import { EmbeddingJob, JobQueueItem, MatchingJob, workerTypes } from '../utils/types'; +import { EmbeddingJob, MatchingJob, JobQueueItem, workerTypes } from '../utils/types'; import { WorkerError } from '../utils/errors'; import { getLogger } from '../utils/logger'; import { addReason } from '../tasks/reason'; import { getDB } from '../utils/db'; -const { - embeddingWorkers, - matchingWorkers, - workerHeartbeatInterval, - workerDeathTimeout, - jobMaxRetries, -} = config; +const { workerHeartbeatInterval, workerDeathTimeout, jobMaxRetries } = config; const logger = getLogger(); +// Recommended heartbeat strategy values (can be tuned via env): +// heartbeat interval: workerHeartbeatInterval (e.g. 30-60s) +// death timeout: workerDeathTimeout (e.g. 240s) => ~4-8x interval + +interface SingleJobItem extends JobQueueItem { + startedAt?: number; + queueDepthAtStart?: number; + meta?: { + kind: 'embedding' | 'matching'; + subtaskErrors: number; + reasonMatches: number; + reasonedCount: number; + }; +} + /** - * WorkerPool - Manages workers for a specific task type + * SingleWorkerManager - streamlined mono-worker queue for inference. + * Keeps public API: enqueue(job, type) + shutdown(). All computation lives in one worker. */ -class WorkerPool { - private workers: Worker[] = []; - private availableWorkers: Set = new Set(); - private busyWorkers: Map = new Map(); - private activeJobs: Map = new Map(); // Track jobs being processed - private jobQueue: JobQueueItem[] = []; - private workerDeathTimers: Map = new Map(); - private workersBeingReplaced: Set = new Set(); // Prevent double replacement using threadId - private readonly workerType: workerTypes; - private readonly poolSize: number; - private readonly maxJobRetries: number; - private readonly workerInitData?: Record; - - constructor(workerType: workerTypes, poolSize: number, workerInitData?: Record) { - this.workerType = workerType; - this.poolSize = poolSize; - this.maxJobRetries = Math.max(0, jobMaxRetries); - this.workerInitData = workerInitData; - - logger.info('worker', `Initializing ${workerType} pool with ${poolSize} workers`, { - workerType, - poolSize, - }); - - this.initialiseWorkers(); +class WorkerManager { + private worker: Worker | null = null; + private queue: SingleJobItem[] = []; + private current: SingleJobItem | null = null; + private lastHeartbeat = Date.now(); + private heartbeatMonitor: NodeJS.Timeout | null = null; + private shuttingDown = false; - logger.debug('worker', `${workerType} pool initialization completed`, { - workerType, - totalWorkers: this.workers.length, - availableWorkers: this.availableWorkers.size, - busyWorkers: this.busyWorkers.size, + constructor() { + logger.info('worker', 'Initialising SingleWorkerManager (inference)', { + workerHeartbeatInterval, + workerDeathTimeout, + jobMaxRetries, }); + this.spawnWorker(); + this.startHeartbeatMonitor(); } - private initialiseWorkers(): void { - for (let i = 0; i < this.poolSize; i++) this.createWorker(); - } - - private createWorker(): void { - const script = this.workerType === 'inference' ? 'inference' : this.workerType; - const worker = createWorker(script, this.workerInitData); - this.workers.push(worker); + private spawnWorker(): void { + this.worker = createWorker('inference'); + const threadId = this.worker.threadId; + logger.info('worker', 'Spawned inference worker', { threadId }); - logger.debug('worker', `Created new ${this.workerType} worker`, { - workerType: this.workerType, - threadId: worker.threadId, - totalWorkers: this.workers.length, - poolSize: this.poolSize, - availableWorkers: this.availableWorkers.size, - busyWorkers: this.busyWorkers.size, + this.worker.on('message', (msg: any) => this.handleMessage(msg)); + this.worker.on('error', (err) => this.handleWorkerCrash(err)); + this.worker.on('exit', (code) => { + this.handleWorkerCrash(new Error(`Worker exited with code ${code}`)); }); - - // Set initial death timer - const deathTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); - this.workerDeathTimers.set(worker, deathTimer); - - worker.on('message', (message: any) => { - if (message.type === 'heartbeat') { - this.handleHeartbeat(worker); - } else if (message.type === 'log') { - // Handle log messages from workers - this.handleWorkerLog(message); - } - }); - - worker.on('error', (error) => this.handleWorkerFailure(worker, error)); - worker.once('exit', () => this.handleWorkerFailure(worker, new Error('Worker exited'))); } - private handleHeartbeat(worker: Worker): void { - logger.debug('worker', `Heartbeat received from ${this.workerType} worker`, { - workerType: this.workerType, - threadId: worker.threadId, - isBusy: this.busyWorkers.has(worker), - }); - - // Reset death timer - const existingTimer = this.workerDeathTimers.get(worker); - if (existingTimer) clearTimeout(existingTimer); - - const newTimer = setTimeout(() => this.killWorker(worker), workerDeathTimeout); - this.workerDeathTimers.set(worker, newTimer); - - // Mark available if not busy - if (!this.busyWorkers.has(worker)) { - this.availableWorkers.add(worker); - this.processNextJob(); - } + private startHeartbeatMonitor(): void { + if (this.heartbeatMonitor) clearInterval(this.heartbeatMonitor); + this.heartbeatMonitor = setInterval( + () => { + if (!this.worker) return; + const silenceMs = Date.now() - this.lastHeartbeat; + if (silenceMs > workerDeathTimeout) { + const activeJobId = this.current?.job.jobId; + logger.warn('worker', 'Heartbeat timeout – restarting worker', { + silenceMs, + workerDeathTimeout, + activeJobId, + queueLength: this.queue.length, + retryCount: this.current?.retryCount, + }); + this.respawnWorker(true); + } + }, + Math.max(10_000, workerHeartbeatInterval / 2), + ); } - private handleWorkerLog(message: any): void { - // Forward worker logs to the main logger - const { level, logType, message: logMessage, data, error } = message; - - if (error) { - const reconstructedError = new Error(error.message); - reconstructedError.stack = error.stack; - reconstructedError.name = error.name; - logger[level as 'debug' | 'info' | 'warn' | 'error']( - logType, - logMessage, - reconstructedError, - data, - ); + private respawnWorker(dueToTimeoutOrCrash = false): void { + if (this.worker) { + this.worker.removeAllListeners(); + try { + this.worker.terminate(); + } catch {} + } + this.worker = null; + // Requeue current job if any (unless shutting down) + if (dueToTimeoutOrCrash && this.current && !this.shuttingDown) { + const item = this.current; + this.current = null; + this.retryOrFail(item, new Error('Worker unresponsive / crashed')); } else { - logger[level as 'debug' | 'info' | 'warn' | 'error'](logType, logMessage, data); + this.current = null; } + this.spawnWorker(); + // Continue processing remaining queue + this.processNext(); } - private killWorker(worker: Worker): void { - // Prevent double replacement if worker is already being replaced - if (this.workersBeingReplaced.has(worker.threadId)) { - logger.debug( - 'worker', - `Skipping replacement for ${this.workerType} worker - already being replaced`, - { - workerType: this.workerType, - threadId: worker.threadId, - }, - ); - return; + private handleMessage(msg: any): void { + // Any message counts as liveness indicator + this.lastHeartbeat = Date.now(); + if (msg.type === 'heartbeat') { + logger.debug('worker', 'Heartbeat received', { threadId: this.worker?.threadId }); + return; // already updated timestamp } - - this.workersBeingReplaced.add(worker.threadId); - - logger.warn('worker', `Killing unresponsive ${this.workerType} worker (heartbeat timeout)`, { - workerType: this.workerType, - threadId: worker.threadId, - reason: 'heartbeat_timeout', - totalWorkersBefore: this.workers.length, - availableWorkersBefore: this.availableWorkers.size, - busyWorkersBefore: this.busyWorkers.size, - }); - - const activeJob = this.activeJobs.get(worker); - if (activeJob) { - this.activeJobs.delete(worker); - this.busyWorkers.delete(worker); - - logger.warn('worker', `Recovering job from unresponsive ${this.workerType} worker`, { - workerType: this.workerType, - threadId: worker.threadId, - jobId: activeJob.job.jobId, - retryCount: activeJob.retryCount, - }); - - this.handleJobFailure(activeJob, new Error('Worker heartbeat timeout')); + if (msg.type === 'log') { + // Forward log (avoid duplicating error stack later) + const { level, logType, message, data, error } = msg; + const logFn = + typeof level === 'string' && ['debug', 'info', 'warn', 'error'].includes(level) + ? (logger as any)[level].bind(logger) + : (logger as any).info.bind(logger); + if (error) { + const e = new Error(error.message); + e.stack = error.stack; + e.name = error.name; + logFn(logType, message, e, data); + } else { + logFn(logType, message, data); + } + return; } + if (!this.current) return; // Ignore job-specific messages if no active job + const jobId = this.current.job.jobId; - this.removeWorker(worker); - - logger.info('worker', `Creating replacement ${this.workerType} worker after timeout`, { - workerType: this.workerType, - killedThreadId: worker.threadId, - totalWorkersAfterRemoval: this.workers.length, - poolSize: this.poolSize, - }); - - this.createWorker(); - - logger.debug('worker', `Replacement ${this.workerType} worker creation completed`, { - workerType: this.workerType, - totalWorkersAfterReplacement: this.workers.length, - availableWorkersAfterReplacement: this.availableWorkers.size, - busyWorkersAfterReplacement: this.busyWorkers.size, - poolSize: this.poolSize, - }); - - this.workersBeingReplaced.delete(worker.threadId); - - this.processNextJob(); - } - - private handleWorkerFailure(worker: Worker, error: Error): void { - // Prevent double replacement if worker is already being replaced - if (this.workersBeingReplaced.has(worker.threadId)) { - logger.debug( - 'worker', - `Skipping replacement for failed ${this.workerType} worker - already being replaced`, - { - workerType: this.workerType, - threadId: worker.threadId, - error: error.message, - }, - ); + if (msg.type === 'error' && msg.jobId === jobId) { + // Worker reported a job-level error + logger.info('worker', 'Job error received', { jobId, message: msg.error }); + this.retryOrFail(this.current, new Error(msg.error)); return; } - - this.workersBeingReplaced.add(worker.threadId); - - logger.error('worker', `${this.workerType} worker failed, replacing worker`, { - // @ts-ignore - workerType: this.workerType, - threadId: worker.threadId, - error: error.message, - stack: error.stack, - totalWorkersBefore: this.workers.length, - availableWorkersBefore: this.availableWorkers.size, - busyWorkersBefore: this.busyWorkers.size, - hasActiveJob: this.activeJobs.has(worker), - }); - - const activeJob = this.activeJobs.get(worker); - if (activeJob) { - // Recover the job that was being processed - this.activeJobs.delete(worker); - this.busyWorkers.delete(worker); - logger.warn('worker', `Recovering job from failed ${this.workerType} worker`, { - workerType: this.workerType, - threadId: worker.threadId, - jobId: activeJob.job.jobId, - retryCount: activeJob.retryCount, + if (msg.type === 'job' && msg.job === 'reason') { + this.handleReasoning(this.current.job, msg.workload).catch((err) => { + logger.error('worker', 'Reasoning handling failed', err, { jobId }); }); - this.handleJobFailure(activeJob, error); + return; } - - this.removeWorker(worker); - - logger.info('worker', `Creating replacement ${this.workerType} worker after failure`, { - workerType: this.workerType, - failedThreadId: worker.threadId, - totalWorkersAfterRemoval: this.workers.length, - poolSize: this.poolSize, - }); - - this.createWorker(); - - logger.debug( - 'worker', - `Replacement ${this.workerType} worker creation completed after failure`, - { - workerType: this.workerType, - totalWorkersAfterReplacement: this.workers.length, - availableWorkersAfterReplacement: this.availableWorkers.size, - busyWorkersAfterReplacement: this.busyWorkers.size, - poolSize: this.poolSize, - }, - ); - - this.workersBeingReplaced.delete(worker.threadId); - - if (activeJob) { - this.processNextJob(); + if (msg.type === 'job_completed' && msg.jobId === jobId) { + const finished = this.current; + this.current = null; + const durationMs = Date.now() - (finished.startedAt || Date.now()); + logger.info('worker', 'Job completed', { + jobId, + durationMs, + retries: finished.retryCount, + }); + const summary = this.buildSummary(finished, durationMs); + finished.resolve(summary); + this.processNext(); + return; } } - private removeWorker(worker: Worker): void { - logger.debug('worker', `Removing ${this.workerType} worker from pool`, { - workerType: this.workerType, - threadId: worker.threadId, - wasAvailable: this.availableWorkers.has(worker), - wasBusy: this.busyWorkers.has(worker), - hadActiveJob: this.activeJobs.has(worker), - totalWorkersBefore: this.workers.length, - }); - - this.availableWorkers.delete(worker); - this.busyWorkers.delete(worker); - this.activeJobs.delete(worker); // Clean up active job tracking - - const timer = this.workerDeathTimers.get(worker); - if (timer) { - clearTimeout(timer); - this.workerDeathTimers.delete(worker); - } - - const index = this.workers.indexOf(worker); - if (index > -1) this.workers.splice(index, 1); - - // Remove all event listeners to prevent exit event from firing - worker.removeAllListeners(); - - // Clean up replacement tracking - this.workersBeingReplaced.delete(worker.threadId); - - worker.terminate(); - - logger.debug('worker', `Removed ${this.workerType} worker from pool`, { - workerType: this.workerType, - threadId: worker.threadId, - totalWorkersAfter: this.workers.length, - availableWorkersAfter: this.availableWorkers.size, - busyWorkersAfter: this.busyWorkers.size, - }); + private buildSummary(item: SingleJobItem, durationMs: number) { + const kind = item.meta?.kind; + return { + jobId: item.job.jobId, + kind, + retries: item.retryCount, + durationMs, + queueDepthAtStart: item.queueDepthAtStart, + workerThreadId: this.worker?.threadId, + matchStats: + kind === 'matching' + ? { + reasonedCount: item.meta?.reasonedCount || 0, + totalMatchesSeen: item.meta?.reasonMatches || 0, + } + : undefined, + embeddingStats: + kind === 'embedding' ? { tasks: (item.job as EmbeddingJob).tasks.length } : undefined, + subtaskErrors: item.meta?.subtaskErrors || 0, + completedAt: new Date().toISOString(), + }; } - public async executeJob(job: EmbeddingJob | MatchingJob): Promise { - logger.info('worker', `Received ${this.workerType} job for queuing`, { - workerType: this.workerType, - jobId: job.jobId, - queueLength: this.jobQueue.length, - availableWorkers: this.availableWorkers.size, - }); - - return new Promise((resolve, reject) => { - this.jobQueue.push({ job, resolve, reject, retryCount: 0 }); - this.processNextJob(); + private processNext(): void { + if (this.current || this.shuttingDown) return; + if (!this.worker) return; // will resume after respawn + const next = this.queue.shift(); + if (!next) return; + this.current = next; + next.startedAt = Date.now(); + next.meta = next.meta || { + kind: this.detectKind(next.job), + subtaskErrors: 0, + reasonMatches: 0, + reasonedCount: 0, + }; + logger.info('worker', 'Starting job', { + jobId: next.job.jobId, + kind: next.meta.kind, + retryCount: next.retryCount, + queueRemaining: this.queue.length, }); + try { + this.worker.postMessage(next.job); + } catch (err) { + logger.error('worker', 'Failed to post job to worker (will retry)', err as Error, { + jobId: next.job.jobId, + }); + this.retryOrFail(next, err instanceof Error ? err : new Error(String(err))); + } } - private processing = false; - - private processNextJob(): void { - if (this.processing) return; // Prevent race conditions - if (this.jobQueue.length === 0 || this.availableWorkers.size === 0) return; - - this.processing = true; - const worker = this.availableWorkers.values().next().value as Worker; - const queueItem = this.jobQueue.shift()!; - - logger.debug('worker', `Processing next job in queue`, { - workerType: this.workerType, - queueLength: this.jobQueue.length, - availableWorkers: this.availableWorkers.size, - }); - - this.assignJobToWorker(worker, queueItem); - this.processing = false; + private detectKind(job: EmbeddingJob | MatchingJob): 'embedding' | 'matching' { + const maybeEmbedding = job as EmbeddingJob; + return (maybeEmbedding as EmbeddingJob).tasks && (maybeEmbedding as any).mode !== undefined + ? 'embedding' + : 'matching'; } - private assignJobToWorker(worker: Worker, queueItem: JobQueueItem): void { - const { job, resolve, reject, retryCount } = queueItem; - this.availableWorkers.delete(worker); - this.busyWorkers.set(worker, job.jobId); - this.activeJobs.set(worker, queueItem); // Track the active job - - logger.info('worker', `Starting ${this.workerType} job on worker`, { - workerType: this.workerType, + public async enqueue(job: EmbeddingJob | MatchingJob, workerType: workerTypes): Promise { + if (this.shuttingDown) throw new Error('WorkerManager shutting down'); + const kind = this.detectKind(job); + logger.info('worker', 'Enqueue job', { jobId: job.jobId, - threadId: worker.threadId, - retryCount, - queueLength: this.jobQueue.length, + workerType, + kind, + queueLengthBefore: this.queue.length, }); - - const messageHandler = (message: any) => { - if (message.type === 'job_completed' && message.jobId === job.jobId) { - worker.removeListener('message', messageHandler); - this.activeJobs.delete(worker); // Remove from active tracking - this.markWorkerAvailable(worker); - - logger.info('worker', `${this.workerType} job completed successfully`, { - workerType: this.workerType, - jobId: job.jobId, - threadId: worker.threadId, - }); - - resolve(message.result || 'Job completed'); - this.processNextJob(); - } else if (message.type === 'error' && message.jobId === job.jobId) { - worker.removeListener('message', messageHandler); - this.activeJobs.delete(worker); // Remove from active tracking - this.markWorkerAvailable(worker); - - logger.info('worker', `${this.workerType} job failed, handling failure`, { - workerType: this.workerType, - jobId: job.jobId, - threadId: worker.threadId, - error: message.error, - }); - - this.handleJobFailure(queueItem, new Error(message.error)); - this.processNextJob(); - } else if (message.type === 'job' && message.job === 'reason') { - this.handleReasoning(job, message); - } - }; - - worker.on('message', messageHandler); - worker.postMessage(job); - } - - private markWorkerAvailable(worker: Worker): void { - this.busyWorkers.delete(worker); - this.availableWorkers.add(worker); - } - - private handleJobFailure(queueItem: JobQueueItem, error: Error): void { - const { job, resolve, reject, retryCount } = queueItem; - - if (retryCount < this.maxJobRetries) { - // Requeue job with incremented retry count - logger.info('worker', `Retrying ${this.workerType} job`, { - workerType: this.workerType, - jobId: job.jobId, - retryCount: retryCount + 1, - maxRetries: this.maxJobRetries, - error: error.message, - }); - - this.jobQueue.unshift({ + return new Promise((resolve, reject) => { + const item: SingleJobItem = { job, resolve, reject, - retryCount: retryCount + 1, - }); - } else { - // Final failure after reaching retry limit - logger.error( - 'worker', - `${this.workerType} job failed permanently after max retries`, - new WorkerError(this.workerType, job.jobId, error), - ); + retryCount: 0, + queueDepthAtStart: this.queue.length, + }; + this.queue.push(item); + this.processNext(); + }); + } - reject(new WorkerError(this.workerType, job.jobId, error)); + private retryOrFail(item: SingleJobItem, err: Error): void { + const jobId = item.job.jobId; + if (item.retryCount < jobMaxRetries) { + item.retryCount += 1; + logger.info('worker', 'Retrying job', { jobId, retryCount: item.retryCount }); + // Put at front of queue + this.queue.unshift(item); + if (this.current === item) this.current = null; // ensure freed + this.processNext(); + } else { + logger.error('worker', 'Job failed permanently', new WorkerError('inference', jobId, err), { + jobId, + retries: item.retryCount, + }); + if (this.current === item) this.current = null; + item.reject(new WorkerError('inference', jobId, err)); + this.processNext(); } } - private async handleReasoning(job: any, message: any): Promise { - const finalMatches = []; - - for (const [task, matches] of Object.entries(message.workload)) { + private async handleReasoning(job: EmbeddingJob | MatchingJob, workload: Record) { + const finalMatches: any[] = []; + for (const [taskText, matches] of Object.entries(workload)) { try { - const taskMatches = await addReason(matches as any[], task); - finalMatches.push(...taskMatches); - } catch (error) { + const enriched = await addReason(matches as any[], taskText); + finalMatches.push(...enriched); + if (this.current?.meta) { + this.current.meta.reasonMatches += (matches as any[]).length; + this.current.meta.reasonedCount += enriched.filter((m) => m.reason).length; + } + } catch { finalMatches.push(...(matches as any[])); } } - const db = getDB(job.dbName); for (const match of finalMatches) { try { @@ -453,62 +287,47 @@ class WorkerPool { alignment: match.alignment, reason: match.reason, }); - } catch (error) { - // Continue on individual match save failure - } + } catch {} } - try { db.updateJobStatus(job.jobId, 'completed'); - } catch (error) { - // Log but don't fail - } - } - - public async shutdown(): Promise { - logger.info('worker', `Shutting down ${this.workerType} pool`, { - workerType: this.workerType, - activeJobs: this.activeJobs.size, - queuedJobs: this.jobQueue.length, - totalWorkers: this.workers.length, + } catch {} + logger.info('worker', 'Reasoning stored', { + jobId: job.jobId, + matchCount: finalMatches.length, }); - - this.jobQueue.forEach((item) => item.reject(new Error('Shutting down'))); - this.activeJobs.forEach((item) => item.reject(new Error('Shutting down'))); // Reject active jobs too - this.workerDeathTimers.forEach((timer) => clearTimeout(timer)); - this.workers.forEach((worker) => worker.terminate()); - - this.jobQueue.length = 0; - this.activeJobs.clear(); - this.workerDeathTimers.clear(); - this.workers.length = 0; - this.availableWorkers.clear(); - this.busyWorkers.clear(); } -} -/** - * WorkerManager - High-level interface for managing worker pools - */ -class WorkerManager { - private inferencePool: WorkerPool; - constructor() { - logger.info('worker', 'Initialising Monolithic WorkerManager (inference)', { - embeddingWorkers, - matchingWorkers, + private handleWorkerCrash(error: Error): void { + logger.error('worker', 'Worker crash detected – respawning', error, { + activeJobId: this.current?.job.jobId, }); - this.inferencePool = new WorkerPool('inference', 1); - } - public async enqueue(job: EmbeddingJob | MatchingJob, workerType: workerTypes): Promise { - logger.info('worker', `Enqueuing ${workerType} job (routed to inference)`, { - workerType, - jobId: job.jobId, - }); - return this.inferencePool.executeJob(job); + this.respawnWorker(true); } + public async shutdown(): Promise { - await this.inferencePool.shutdown(); + this.shuttingDown = true; + logger.info('worker', 'Shutting down worker manager', { + inFlight: this.current?.job.jobId, + queueLength: this.queue.length, + }); + // Reject queued jobs + for (const item of this.queue) item.reject(new Error('Shutting down')); + this.queue.length = 0; + if (this.current) { + this.current.reject(new Error('Shutting down')); + this.current = null; + } + if (this.worker) { + this.worker.removeAllListeners(); + try { + await this.worker.terminate(); + } catch {} + this.worker = null; + } + if (this.heartbeatMonitor) clearInterval(this.heartbeatMonitor); } } + const manager = new WorkerManager(); export default manager; From ed8bb1e69e7dd4dd8f00885f0dfe38dd5a559b91 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:21:49 +0100 Subject: [PATCH 47/48] task vs match embedding --- .../src/worker/inference.ts | 10 ++++++-- .../src/worker/worker-manager.ts | 24 +++++++++++++++---- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/competence-matcher/src/worker/inference.ts b/src/competence-matcher/src/worker/inference.ts index 376c7a2f2..8e67f290d 100644 --- a/src/competence-matcher/src/worker/inference.ts +++ b/src/competence-matcher/src/worker/inference.ts @@ -34,14 +34,20 @@ startHeartbeat('inference', config.workerHeartbeatInterval); parentPort.on('message', async (raw: any) => { const possible = raw as EmbeddingJob & MatchingJob; + const tasks: any[] | undefined = (possible as any).tasks; + const mode = (possible as any).mode; + // Classification logic: explicit mode or task shape for embedding const isEmbeddingJob = - (possible as EmbeddingJob).tasks && (possible as EmbeddingJob).mode !== undefined; + mode === 'task' || + mode === 'resource' || + (Array.isArray(tasks) && tasks.length > 0 && 'competenceId' in tasks[0] && 'text' in tasks[0]); const jobId = possible.jobId; workerLogger(jobId || 'system', 'debug', 'Inference worker received job', { threadId, jobId, + inferredMode: mode || null, kind: isEmbeddingJob ? 'embedding' : 'matching', - taskCount: possible.tasks?.length || 0, + taskCount: tasks?.length || 0, }); try { if (isEmbeddingJob) await ensureEmbeddingInitialised(); diff --git a/src/competence-matcher/src/worker/worker-manager.ts b/src/competence-matcher/src/worker/worker-manager.ts index 160ecd911..9bf508639 100644 --- a/src/competence-matcher/src/worker/worker-manager.ts +++ b/src/competence-matcher/src/worker/worker-manager.ts @@ -210,10 +210,18 @@ class WorkerManager { } private detectKind(job: EmbeddingJob | MatchingJob): 'embedding' | 'matching' { - const maybeEmbedding = job as EmbeddingJob; - return (maybeEmbedding as EmbeddingJob).tasks && (maybeEmbedding as any).mode !== undefined - ? 'embedding' - : 'matching'; + // Explicit mode wins + const mode = (job as any).mode; + if (mode === 'task' || mode === 'resource') return 'embedding'; + // Heuristic: resource embedding tasks have competenceId & text + const tasks: any[] | undefined = (job as any).tasks; + if (Array.isArray(tasks) && tasks.length) { + const sample = tasks[0]; + if (sample && 'competenceId' in sample && 'text' in sample && 'type' in sample) { + return 'embedding'; + } + } + return 'matching'; } public async enqueue(job: EmbeddingJob | MatchingJob, workerType: workerTypes): Promise { @@ -260,6 +268,11 @@ class WorkerManager { private async handleReasoning(job: EmbeddingJob | MatchingJob, workload: Record) { const finalMatches: any[] = []; + const taskCount = Object.keys(workload).length; + logger.debug('worker', 'Reasoning workload received', { + jobId: job.jobId, + taskCount, + }); for (const [taskText, matches] of Object.entries(workload)) { try { const enriched = await addReason(matches as any[], taskText); @@ -294,7 +307,10 @@ class WorkerManager { } catch {} logger.info('worker', 'Reasoning stored', { jobId: job.jobId, + taskCount, + rawMatchGroups: Object.values(workload).reduce((sum, arr) => sum + (arr as any[]).length, 0), matchCount: finalMatches.length, + reasonedCount: finalMatches.filter((m) => m.reason).length, }); } From f903e4f9b9fc6efe9caf71bec655cd1cc3a57573 Mon Sep 17 00:00:00 2001 From: Maxi <76120220+MaxiLein@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:37:53 +0100 Subject: [PATCH 48/48] keep deprecated settings for now --- src/competence-matcher/src/config.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/competence-matcher/src/config.ts b/src/competence-matcher/src/config.ts index fe5117d2a..58eb40272 100644 --- a/src/competence-matcher/src/config.ts +++ b/src/competence-matcher/src/config.ts @@ -38,4 +38,9 @@ export const config = { alignmentDistanceThreshold: parseFloat(process.env.ALIGNMENT_DISTANCE_THRESHOLD || '0.65'), // Minimum distance required for alignment boost alignmentBoostMultiplier: parseFloat(process.env.ALIGNMENT_BOOST_MULTIPLIER || '1.2'), // Multiplier to boost distance for aligning matches neutralReductionMultiplier: parseFloat(process.env.NEUTRAL_REDUCTION_MULTIPLIER || '0.65'), // Multiplier to reduce distance for neutral matches + + // Deprecated settings + /* For multiworker part, can be removed, if we actually settle for */ + embeddingWorkers: parseInt(process.env.EMBEDDING_WORKERS || '1', 10), // Number of embedding workers + matchingWorkers: parseInt(process.env.MATCHING_WORKERS || '1', 10), // Number of matching workers };