diff --git a/.azure.env b/.azure.env new file mode 100644 index 000000000..003bdc8a3 --- /dev/null +++ b/.azure.env @@ -0,0 +1,5 @@ +GENAISCRIPT_MODEL_LARGE="azure:gpt-4o" +GENAISCRIPT_MODEL_SMALL="azure:gpt-4o-mini" +GENAISCRIPT_MODEL_EVAL="azure:gpt-4o" +GENAISCRIPT_MODEL_RULES="azure:gpt-4o" +GENAISCRIPT_MODEL_BASELINE="azure:gpt-4o" diff --git a/.github/instructions/implementation.instructions.md b/.github/instructions/implementation.instructions.md index b5d718983..8d32961ff 100644 --- a/.github/instructions/implementation.instructions.md +++ b/.github/instructions/implementation.instructions.md @@ -17,6 +17,9 @@ at runtime, rather than hardcoding them in your codebase. Whenever possible, use the original `.prompty` files from the `src/prompts` directory. +Always to try to create minimal changes to the existing source code and make sure the generated code is compatible with the existing codebase. +Make changes in such a way that a developer will be able to understand and review the updates. + ## Phase 1: Test Generation PromptPex is a test generate framework for prompts. It is made of a graph of LLM transformations that eventually generate a set of @@ -24,7 +27,7 @@ inputs and expected outputs for a given prompt. - The core of the framework is documented in [Test Generation](docs/src/content/docs/reference/test-generation.md). -## Phase: Validate Test Generation +## Phase 2: Validate Test Generation Once you have implemented the test generation, you should validate it on a prompt. @@ -79,3 +82,7 @@ It is implemented using [GenAIScript](https://microsoft.github.io/genaiscript/). **Following the patterns and habits of the the target framework/language you are generating**. The reference implementation is a good starting point but you should adapt it to the target framework/language you are generating. + +## Instructions + +- Add comments in generated code explaining the source of the code in the promptpex specification. \ No newline at end of file diff --git a/.github/workflows/release.sh b/.github/workflows/release.sh index 3b157ba7f..a4b455165 100644 --- a/.github/workflows/release.sh +++ b/.github/workflows/release.sh @@ -25,7 +25,7 @@ IMAGE_NAME="ghcr.io/microsoft/promptpex" echo "Building Docker image: $IMAGE_NAME:$NEW_VERSION" # Build the Docker image with version tag -docker build -t "$IMAGE_NAME:$NEW_VERSION" . +docker build -t "$IMAGE_NAME:$NEW_VERSION" . -f Dockerfile.serve # Tag with major version docker tag "$IMAGE_NAME:$NEW_VERSION" "$IMAGE_NAME:$MAJOR" @@ -40,10 +40,10 @@ docker logout ghcr.io echo "✅ Docker image pushed to GHCR: $IMAGE_NAME:$NEW_VERSION and $IMAGE_NAME:$MAJOR" # Update action.yml with new version -sed -i "s|image: .*|image: docker://$IMAGE_NAME:$NEW_VERSION|" action.yml -git add action.yml -git commit -m "[chore] upgrade image in action.yml" -git push origin HEAD +#sed -i "s|image: .*|image: docker://$IMAGE_NAME:$NEW_VERSION|" action.yml +#git add action.yml +#git commit -m "[chore] upgrade image in action.yml" +#git push origin HEAD # Step 4: Create GitHub release gh release create "$NEW_VERSION" --title "$NEW_VERSION" --notes "Patch release $NEW_VERSION" diff --git a/.gitignore b/.gitignore index 866cfed0e..cec9b9494 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,4 @@ evals/explainer/ samples/github-models/**.prompty evals/demo/ evals/summarizer/ +.promptpex.env diff --git a/README.md b/README.md index 44617d9cd..b6d903ed8 100644 --- a/README.md +++ b/README.md @@ -3,20 +3,12 @@ > Test Generation for Prompts - [Read the documentation](https://microsoft.github.io/promptpex/) +- [PromptPex technical paper](http://arxiv.org/abs/2503.05070) **Prompts** are an important part of any software project that incorporates the power of AI models. As a result, tools to help developers create and maintain effective prompts are increasingly important. -- [Prompts Are Programs - ACM Blog Post](https://blog.sigplan.org/2024/10/22/prompts-are-programs/) - -**PromptPex** is a tool for exploring and testing AI model prompts. PromptPex is -intended to be used by developers who have prompts as part of their code base. -PromptPex treats a prompt as a function and automatically generates test inputs -to the function to support unit testing. - -- [PromptPex technical paper](http://arxiv.org/abs/2503.05070) - PromptPex provides the following capabilities: @@ -32,6 +24,18 @@ PromptPex provides the following capabilities: - PromptPex uses an LLM to automatically determine whether model outputs meet the specified requirements. - Automatically export the generated tests and rule-based evaluations to the OpenAI Evals API. +## Integrations + +- [GitHub Models Extension](https://github.com/github/gh-models/releases/tag/v0.0.25) + +## Running PromptPex + +The PromptPex tool runs dockerized with this command. + +```sh +docker run -p 8003:8003 ghcr.io/microsoft/promptpex:v0 +``` + ## Responsible AI Transparency Note Please reference [responsible-ai-transparency-note.md](./docs/src/content/docs/responsible-ai-transparency-note.md) for more information. diff --git a/action.yml b/action.yml index c8953601e..d9435f8dd 100644 --- a/action.yml +++ b/action.yml @@ -356,63 +356,55 @@ inputs: .prompty,.md,.txt,.json,.prompt.yml required: false debug: - description: Enable debug logging - (https://microsoft.github.io/genaiscript/reference/scripts/logging/). + description: Enable [debug + logging](https://microsoft.github.io/genaiscript/reference/scripts/logging/). + required: false + model_alias: + description: "A YAML-like list of model aliases and model id: `translation: + github:openai/gpt-4o`" required: false openai_api_key: description: OpenAI API key required: false - default: ${{ secrets.OPENAI_API_KEY }} openai_api_base: description: OpenAI API base URL required: false - default: ${{ env.OPENAI_API_BASE }} azure_openai_api_endpoint: description: Azure OpenAI endpoint. In the Azure Portal, open your Azure OpenAI resource, Keys and Endpoints, copy Endpoint. required: false - default: ${{ env.AZURE_OPENAI_API_ENDPOINT }} azure_openai_api_key: description: Azure OpenAI API key. **You do NOT need this if you are using Microsoft Entra ID. required: false - default: ${{ secrets.AZURE_OPENAI_API_KEY }} azure_openai_subscription_id: description: Azure OpenAI subscription ID to list available deployments (Microsoft Entra only). required: false - default: ${{ env.AZURE_OPENAI_SUBSCRIPTION_ID }} azure_openai_api_version: description: Azure OpenAI API version. required: false - default: ${{ env.AZURE_OPENAI_API_VERSION }} azure_openai_api_credentials: description: Azure OpenAI API credentials type. Leave as 'default' unless you have a special Azure setup. required: false - default: ${{ env.AZURE_OPENAI_API_CREDENTIALS }} azure_ai_inference_api_key: description: Azure AI Inference key required: false - default: ${{ secrets.AZURE_AI_INFERENCE_API_KEY }} azure_ai_inference_api_endpoint: description: Azure Serverless OpenAI endpoint required: false - default: ${{ env.AZURE_AI_INFERENCE_API_ENDPOINT }} azure_ai_inference_api_version: description: Azure Serverless OpenAI API version required: false - default: ${{ env.AZURE_AI_INFERENCE_API_VERSION }} azure_ai_inference_api_credentials: description: Azure Serverless OpenAI API credentials type required: false - default: ${{ env.AZURE_AI_INFERENCE_API_CREDENTIALS }} github_token: - description: "GitHub token with `models: read` permission at least - (https://microsoft.github.io/genaiscript/reference/github-actions/#github\ - -models-permissions)." + description: "GitHub token with [models: + read](https://microsoft.github.io/genaiscript/reference/github-actions/#g\ + ithub-models-permissions) permission at least." required: false - default: ${{ secrets.GITHUB_TOKEN }} outputs: text: description: The generated text output. diff --git a/docs/src/content/docs/index.mdx b/docs/src/content/docs/index.mdx index d9d0854ed..b52ccc291 100644 --- a/docs/src/content/docs/index.mdx +++ b/docs/src/content/docs/index.mdx @@ -30,7 +30,10 @@ intended to be used by developers who have prompts as part of their code base. PromptPex treats a prompt as a function and automatically generates test inputs to the function to support unit testing. -