Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ test:
country: France
```

### Output override

```yml
prompts:
- What is the capital of France?
test:
- output: Paris # Used in JQA tests: https://eva-llm.github.io/eva-run/#jqa-judge-quality-audit-metrology-mode
```

### Asserts

**NOTE!** All LLM asserts support natively [Dark Teaming](https://eva-llm.github.io/dark-teaming) to measure Epistemic Honesty via Symmetry Deviation, and extend Promptfoo format with field `must_fail`
Expand Down
9 changes: 9 additions & 0 deletions docs/src/pages/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ test:
country: France
```

### Output override

```yml
prompts:
- What is the capital of France?
test:
- output: Paris # Used in JQA tests: https://eva-llm.github.io/eva-run/#jqa-judge-quality-audit-metrology-mode
```

### Asserts

**NOTE!** All LLM asserts support natively [Dark Teaming](https://eva-llm.github.io/dark-teaming) to measure Epistemic Honesty via Symmetry Deviation, and extend Promptfoo format with field `must_fail`
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@eva-llm/eva-parser",
"version": "1.0.3",
"version": "1.0.4",
"description": "A converter for Promptfoo test formats and into the EVA-LLM ecosystem",
"main": "dst/index.js",
"types": "dst/index.d.ts",
Expand Down
51 changes: 38 additions & 13 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,19 @@ import { parse } from 'yaml';
import {
ASSERT_NAMES,
type TAssert,
type TEvaTest,
type TEvaTestWithPrompt,
type TProviderObj,
type TTest,
type TVercelOptions,
} from './types';


export * from './types';
export {
ASSERT_NAMES,
type TTest,
type TAssert,
} from './types';

const parseProvider = (providerObj: string | TProviderObj) => {
let options: TVercelOptions = {};
Expand Down Expand Up @@ -86,7 +93,7 @@ const parseAssert = (fooAssert: any): Omit<TAssert, 'criteria'> => {

export function parsePromptfoo(yamlContent: string) {
const promptfoo = parse(yamlContent);
const evaTests = [];
const evaTests: TEvaTest[] = [];

for (const fooTest of promptfoo.tests || []) {
for (let i = 0; i < (fooTest.times || 1); i++) {
Expand All @@ -95,11 +102,15 @@ export function parsePromptfoo(yamlContent: string) {
continue;
}

const evaTest = {
const evaTest: TEvaTest = {
vars: fooTest.vars,
asserts: [] as TAssert[],
};

if (fooTest.output !== undefined) {
evaTest.output = fooTest.output;
};

for (const fooAssert of fooTest.assert) {
if (!Object.values(ASSERT_NAMES).includes(fooAssert.type)) {
continue;
Expand Down Expand Up @@ -130,18 +141,24 @@ export function parsePromptfoo(yamlContent: string) {
return [];
}

const evaTestsWithPrompts = [];
const evaTestsWithPrompts: TEvaTestWithPrompt[] = [];
for (const fooPrompt of promptfoo.prompts || []) {
for (const evaTest of evaTests) {

evaTestsWithPrompts.push({
const evaTestWithPrompt: TEvaTestWithPrompt = {
prompt: injectVars(fooPrompt, evaTest.vars),
asserts: evaTest.asserts,
});
};

if (evaTest.output !== undefined) {
evaTestWithPrompt.output = evaTest.output;
}

evaTestsWithPrompts.push(evaTestWithPrompt);
}
}

const finalTests = [];
const evaRunTasks: TTest[] = [];
for (const providerObj of promptfoo.providers || []) {
const parsedProvider = parseProvider(providerObj);

Expand All @@ -156,12 +173,20 @@ export function parsePromptfoo(yamlContent: string) {
return assert;
});

finalTests.push({
...parsedProvider,
prompt: evaTestWithPrompt.prompt,
asserts,
});
if (evaTestWithPrompt.output !== undefined) {
evaRunTasks.push({
prompt: evaTestWithPrompt.prompt,
output: evaTestWithPrompt.output,
asserts,
});
} else {
evaRunTasks.push({
...parsedProvider,
prompt: evaTestWithPrompt.prompt,
asserts,
});
}
}
}
return finalTests;
return evaRunTasks;
}
24 changes: 24 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,30 @@ export type TAssert = {
case_sensitive?: boolean;
}

export type TEvaTest = {
vars: undefined | Record<string, any>;
output?: string;
asserts: TAssert[];
}

export type TEvaTestWithPrompt = {
prompt: string;
output?: string;
asserts: TAssert[];
}

export type TTest = {
prompt: string;
output: string;
asserts: TAssert[];
} | {
provider: string;
model: string;
options?: TVercelOptions
prompt: string;
asserts: TAssert[];
}

export type TProviderObj = {
id: string;
config: Record<string, any>;
Expand Down
168 changes: 162 additions & 6 deletions tests/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ tests:
value: Hi
`;
const results = parsePromptfoo(yaml);
expect(results.find(r => r.provider === 'openai')).toBeDefined();
expect(results.find(r => r.provider === 'anthropic')).toBeDefined();
expect(results.find(r => 'provider' in r && r.provider === 'openai')).toBeDefined();
expect(results.find(r => 'provider' in r && r.provider === 'anthropic')).toBeDefined();
});
});

Expand Down Expand Up @@ -453,6 +453,162 @@ tests:
});
});

describe('test with output (JQA)', () => {
it('produces result with output and no provider/model when test has output', () => {
const yaml = `
prompts: ['Say hi']
providers: ['openai:gpt-4o']
tests:
- output: "Hello there"
assert:
- type: contains
value: Hello
`;
const results = parsePromptfoo(yaml);
expect(results).toHaveLength(1);
expect(results[0]).toMatchObject({
prompt: 'Say hi',
output: 'Hello there',
});
expect(results[0].asserts[0].criteria).toBe('Hello');
expect('provider' in results[0]).toBe(false);
expect('model' in results[0]).toBe(false);
});

it('produces result with provider/model and no output when test has no output', () => {
const yaml = `
prompts: ['Say hi']
providers: ['openai:gpt-4o']
tests:
- assert:
- type: contains
value: Hello
`;
const results = parsePromptfoo(yaml);
expect(results).toHaveLength(1);
expect('provider' in results[0] && results[0].provider).toBe('openai');
expect('model' in results[0] && results[0].model).toBe('gpt-4o');
expect('output' in results[0]).toBe(false);
});

it('handles mix of tests with and without output', () => {
const yaml = `
prompts: ['Hello']
providers: ['openai:gpt-4o']
tests:
- output: "Pre-generated response"
assert:
- type: contains
value: response
- assert:
- type: contains
value: hi
`;
const results = parsePromptfoo(yaml);
expect(results).toHaveLength(2);

const withOutput = results[0];
expect('output' in withOutput).toBe(true);
expect('provider' in withOutput).toBe(false);

const withoutOutput = results[1];
expect('output' in withoutOutput).toBe(false);
expect('provider' in withoutOutput && withoutOutput.provider).toBe('openai');
});

it('output test still inherits provider in asserts that lack one', () => {
const yaml = `
prompts: ['Hello']
providers: ['openai:gpt-4o']
tests:
- output: "Some response"
assert:
- type: g-eval
value: The response is friendly
`;
const results = parsePromptfoo(yaml);
expect(results[0].asserts[0].provider).toBe('openai');
expect(results[0].asserts[0].model).toBe('gpt-4o');
});

it('output test with assert-level provider keeps assert provider', () => {
const yaml = `
prompts: ['Hello']
providers: ['openai:gpt-4o']
tests:
- output: "Some response"
assert:
- type: g-eval
value: The response is friendly
provider: anthropic:claude-3-5-sonnet
`;
const results = parsePromptfoo(yaml);
expect(results[0].asserts[0].provider).toBe('anthropic');
expect(results[0].asserts[0].model).toBe('claude-3-5-sonnet');
});

it('output test with multiple providers produces one result per provider', () => {
const yaml = `
prompts: ['Hello']
providers:
- openai:gpt-4o
- anthropic:claude-3-5-sonnet
tests:
- output: "Pre-generated"
assert:
- type: contains
value: generated
`;
const results = parsePromptfoo(yaml);
// 2 providers × 1 prompt × 1 test = 2
expect(results).toHaveLength(2);
// Both should have output, no provider on the result
for (const r of results) {
expect('output' in r && r.output).toBe('Pre-generated');
expect('provider' in r).toBe(false);
}
});

it('output test with vars still injects vars into prompt', () => {
const yaml = `
prompts: ['Tell me about {{topic}}']
providers: ['openai:gpt-4o']
tests:
- vars:
topic: dolphins
output: "Dolphins are amazing marine mammals"
assert:
- type: contains
value: dolphin
`;
const results = parsePromptfoo(yaml);
expect(results[0].prompt).toBe('Tell me about dolphins');
expect('output' in results[0] && results[0].output).toBe('Dolphins are amazing marine mammals');
});

it('output test cross-product with multiple prompts', () => {
const yaml = `
prompts:
- 'Prompt A'
- 'Prompt B'
providers: ['openai:gpt-4o']
tests:
- output: "Fixed response"
assert:
- type: contains
value: response
`;
const results = parsePromptfoo(yaml);
// 1 provider × 2 prompts × 1 test = 2
expect(results).toHaveLength(2);
expect(results[0].prompt).toBe('Prompt A');
expect(results[1].prompt).toBe('Prompt B');
for (const r of results) {
expect('output' in r && r.output).toBe('Fixed response');
}
});
});

describe('provider as object', () => {
it('parses top-level provider object with temperature', () => {
const yaml = `
Expand All @@ -467,9 +623,9 @@ tests:
value: Hi
`;
const results = parsePromptfoo(yaml);
expect(results[0].provider).toBe('openai');
expect(results[0].model).toBe('gpt-4o');
expect(results[0].options).toEqual({ temperature: 0.5 });
expect('provider' in results[0] && results[0].provider).toBe('openai');
expect('model' in results[0] && results[0].model).toBe('gpt-4o');
expect('options' in results[0] && results[0].options).toEqual({ temperature: 0.5 });
});

it('parses top-level provider object without temperature', () => {
Expand All @@ -484,7 +640,7 @@ tests:
value: Hi
`;
const results = parsePromptfoo(yaml);
expect(results[0].options).toEqual({});
expect('options' in results[0] && results[0].options).toEqual({});
});
});
});
Loading