eva-llm · schipiga · Apr 18, 2026 · Apr 18, 2026
diff --git a/README.md b/README.md
@@ -65,6 +65,15 @@ test:
       country: France
 ```
 
+### Output override
+
+```yml
+prompts:
+  - What is the capital of France?
+test:
+  - output: Paris # Used in JQA tests: https://eva-llm.github.io/eva-run/#jqa-judge-quality-audit-metrology-mode
+```
+
 ### Asserts
 
 **NOTE!** All LLM asserts support natively [Dark Teaming](https://eva-llm.github.io/dark-teaming) to measure Epistemic Honesty via Symmetry Deviation, and extend Promptfoo format with field `must_fail`

diff --git a/docs/src/pages/index.md b/docs/src/pages/index.md
@@ -63,6 +63,15 @@ test:
       country: France
 ```
 
+### Output override
+
+```yml
+prompts:
+  - What is the capital of France?
+test:
+  - output: Paris # Used in JQA tests: https://eva-llm.github.io/eva-run/#jqa-judge-quality-audit-metrology-mode
+```
+
 ### Asserts
 
 **NOTE!** All LLM asserts support natively [Dark Teaming](https://eva-llm.github.io/dark-teaming) to measure Epistemic Honesty via Symmetry Deviation, and extend Promptfoo format with field `must_fail`

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@eva-llm/eva-parser",
-  "version": "1.0.3",
+  "version": "1.0.4",
   "description": "A converter for Promptfoo test formats and into the EVA-LLM ecosystem",
   "main": "dst/index.js",
   "types": "dst/index.d.ts",

diff --git a/src/index.ts b/src/index.ts
@@ -4,12 +4,19 @@ import { parse } from 'yaml';
 import {
   ASSERT_NAMES,
   type TAssert,
+  type TEvaTest,
+  type TEvaTestWithPrompt,
   type TProviderObj,
+  type TTest,
   type TVercelOptions,
 } from './types';
 
 
-export * from './types';
+export {
+  ASSERT_NAMES,
+  type TTest,
+  type TAssert,
+} from './types';
 
 const parseProvider = (providerObj: string | TProviderObj) => {
   let options: TVercelOptions = {};
@@ -86,7 +93,7 @@ const parseAssert = (fooAssert: any): Omit<TAssert, 'criteria'> => {
 
 export function parsePromptfoo(yamlContent: string) {
   const promptfoo = parse(yamlContent);
-  const evaTests = [];
+  const evaTests: TEvaTest[] = [];
 
   for (const fooTest of promptfoo.tests || []) {
     for (let i = 0; i < (fooTest.times || 1); i++) {
@@ -95,11 +102,15 @@ export function parsePromptfoo(yamlContent: string) {
         continue;
       }
 
-      const evaTest = {
+      const evaTest: TEvaTest = {
         vars: fooTest.vars,
         asserts: [] as TAssert[],
       };
 
+      if (fooTest.output !== undefined) {
+        evaTest.output = fooTest.output;
+      };
+
       for (const fooAssert of fooTest.assert) {
         if (!Object.values(ASSERT_NAMES).includes(fooAssert.type)) {
           continue;
@@ -130,18 +141,24 @@ export function parsePromptfoo(yamlContent: string) {
     return [];
   }
 
-  const evaTestsWithPrompts = [];
+  const evaTestsWithPrompts: TEvaTestWithPrompt[] = [];
   for (const fooPrompt of promptfoo.prompts || []) {
     for (const evaTest of evaTests) {
 
-      evaTestsWithPrompts.push({
+      const evaTestWithPrompt: TEvaTestWithPrompt = {
         prompt: injectVars(fooPrompt, evaTest.vars),
         asserts: evaTest.asserts,
-      });
+      };
+
+      if (evaTest.output !== undefined) {
+        evaTestWithPrompt.output = evaTest.output;
+      }
+
+      evaTestsWithPrompts.push(evaTestWithPrompt);
     } 
   }
 
-  const finalTests = [];
+  const evaRunTasks: TTest[] = [];
   for (const providerObj of promptfoo.providers || []) {
     const parsedProvider = parseProvider(providerObj);
 
@@ -156,12 +173,20 @@ export function parsePromptfoo(yamlContent: string) {
         return assert;
       });
 
-      finalTests.push({
-        ...parsedProvider,
-        prompt: evaTestWithPrompt.prompt,
-        asserts,
-      });
+      if (evaTestWithPrompt.output !== undefined) {
+        evaRunTasks.push({
+          prompt: evaTestWithPrompt.prompt,
+          output: evaTestWithPrompt.output,
+          asserts,
+        });
+      } else {
+        evaRunTasks.push({
+          ...parsedProvider,
+          prompt: evaTestWithPrompt.prompt,
+          asserts,
+        });
+      }
     }
   }
-  return finalTests;
+  return evaRunTasks;
 }
diff --git a/src/types.ts b/src/types.ts
@@ -23,6 +23,30 @@ export type TAssert = {
   case_sensitive?: boolean;
 }
 
+export type TEvaTest = {
+  vars: undefined | Record<string, any>;
+  output?: string;
+  asserts: TAssert[];
+}
+
+export type TEvaTestWithPrompt = {
+  prompt: string;
+  output?: string;
+  asserts: TAssert[];
+}
+
+export type TTest = {
+  prompt: string;
+  output: string;
+  asserts: TAssert[];
+} | {
+  provider: string;
+  model: string;
+  options?: TVercelOptions
+  prompt: string;
+  asserts: TAssert[];
+}
+
 export type TProviderObj = {
   id: string;
   config: Record<string, any>;

diff --git a/tests/index.test.ts b/tests/index.test.ts
@@ -417,8 +417,8 @@ tests:
         value: Hi
 `;
       const results = parsePromptfoo(yaml);
-      expect(results.find(r => r.provider === 'openai')).toBeDefined();
-      expect(results.find(r => r.provider === 'anthropic')).toBeDefined();
+      expect(results.find(r => 'provider' in r && r.provider === 'openai')).toBeDefined();
+      expect(results.find(r => 'provider' in r && r.provider === 'anthropic')).toBeDefined();
     });
   });
 
@@ -453,6 +453,162 @@ tests:
     });
   });
 
+  describe('test with output (JQA)', () => {
+    it('produces result with output and no provider/model when test has output', () => {
+      const yaml = `
+prompts: ['Say hi']
+providers: ['openai:gpt-4o']
+tests:
+  - output: "Hello there"
+    assert:
+      - type: contains
+        value: Hello
+`;
+      const results = parsePromptfoo(yaml);
+      expect(results).toHaveLength(1);
+      expect(results[0]).toMatchObject({
+        prompt: 'Say hi',
+        output: 'Hello there',
+      });
+      expect(results[0].asserts[0].criteria).toBe('Hello');
+      expect('provider' in results[0]).toBe(false);
+      expect('model' in results[0]).toBe(false);
+    });
+
+    it('produces result with provider/model and no output when test has no output', () => {
+      const yaml = `
+prompts: ['Say hi']
+providers: ['openai:gpt-4o']
+tests:
+  - assert:
+      - type: contains
+        value: Hello
+`;
+      const results = parsePromptfoo(yaml);
+      expect(results).toHaveLength(1);
+      expect('provider' in results[0] && results[0].provider).toBe('openai');
+      expect('model' in results[0] && results[0].model).toBe('gpt-4o');
+      expect('output' in results[0]).toBe(false);
+    });
+
+    it('handles mix of tests with and without output', () => {
+      const yaml = `
+prompts: ['Hello']
+providers: ['openai:gpt-4o']
+tests:
+  - output: "Pre-generated response"
+    assert:
+      - type: contains
+        value: response
+  - assert:
+      - type: contains
+        value: hi
+`;
+      const results = parsePromptfoo(yaml);
+      expect(results).toHaveLength(2);
+
+      const withOutput = results[0];
+      expect('output' in withOutput).toBe(true);
+      expect('provider' in withOutput).toBe(false);
+
+      const withoutOutput = results[1];
+      expect('output' in withoutOutput).toBe(false);
+      expect('provider' in withoutOutput && withoutOutput.provider).toBe('openai');
+    });
+
+    it('output test still inherits provider in asserts that lack one', () => {
+      const yaml = `
+prompts: ['Hello']
+providers: ['openai:gpt-4o']
+tests:
+  - output: "Some response"
+    assert:
+      - type: g-eval
+        value: The response is friendly
+`;
+      const results = parsePromptfoo(yaml);
+      expect(results[0].asserts[0].provider).toBe('openai');
+      expect(results[0].asserts[0].model).toBe('gpt-4o');
+    });
+
+    it('output test with assert-level provider keeps assert provider', () => {
+      const yaml = `
+prompts: ['Hello']
+providers: ['openai:gpt-4o']
+tests:
+  - output: "Some response"
+    assert:
+      - type: g-eval
+        value: The response is friendly
+        provider: anthropic:claude-3-5-sonnet
+`;
+      const results = parsePromptfoo(yaml);
+      expect(results[0].asserts[0].provider).toBe('anthropic');
+      expect(results[0].asserts[0].model).toBe('claude-3-5-sonnet');
+    });
+
+    it('output test with multiple providers produces one result per provider', () => {
+      const yaml = `
+prompts: ['Hello']
+providers:
+  - openai:gpt-4o
+  - anthropic:claude-3-5-sonnet
+tests:
+  - output: "Pre-generated"
+    assert:
+      - type: contains
+        value: generated
+`;
+      const results = parsePromptfoo(yaml);
+      // 2 providers × 1 prompt × 1 test = 2
+      expect(results).toHaveLength(2);
+      // Both should have output, no provider on the result
+      for (const r of results) {
+        expect('output' in r && r.output).toBe('Pre-generated');
+        expect('provider' in r).toBe(false);
+      }
+    });
+
+    it('output test with vars still injects vars into prompt', () => {
+      const yaml = `
+prompts: ['Tell me about {{topic}}']
+providers: ['openai:gpt-4o']
+tests:
+  - vars:
+      topic: dolphins
+    output: "Dolphins are amazing marine mammals"
+    assert:
+      - type: contains
+        value: dolphin
+`;
+      const results = parsePromptfoo(yaml);
+      expect(results[0].prompt).toBe('Tell me about dolphins');
+      expect('output' in results[0] && results[0].output).toBe('Dolphins are amazing marine mammals');
+    });
+
+    it('output test cross-product with multiple prompts', () => {
+      const yaml = `
+prompts:
+  - 'Prompt A'
+  - 'Prompt B'
+providers: ['openai:gpt-4o']
+tests:
+  - output: "Fixed response"
+    assert:
+      - type: contains
+        value: response
+`;
+      const results = parsePromptfoo(yaml);
+      // 1 provider × 2 prompts × 1 test = 2
+      expect(results).toHaveLength(2);
+      expect(results[0].prompt).toBe('Prompt A');
+      expect(results[1].prompt).toBe('Prompt B');
+      for (const r of results) {
+        expect('output' in r && r.output).toBe('Fixed response');
+      }
+    });
+  });
+
   describe('provider as object', () => {
     it('parses top-level provider object with temperature', () => {
       const yaml = `
@@ -467,9 +623,9 @@ tests:
         value: Hi
 `;
       const results = parsePromptfoo(yaml);
-      expect(results[0].provider).toBe('openai');
-      expect(results[0].model).toBe('gpt-4o');
-      expect(results[0].options).toEqual({ temperature: 0.5 });
+      expect('provider' in results[0] && results[0].provider).toBe('openai');
+      expect('model' in results[0] && results[0].model).toBe('gpt-4o');
+      expect('options' in results[0] && results[0].options).toEqual({ temperature: 0.5 });
     });
 
     it('parses top-level provider object without temperature', () => {
@@ -484,7 +640,7 @@ tests:
         value: Hi
 `;
       const results = parsePromptfoo(yaml);
-      expect(results[0].options).toEqual({});
+      expect('options' in results[0] && results[0].options).toEqual({});
     });
   });
 });