From e08fc0fabda0294f93234789e0e08f135a1d0d07 Mon Sep 17 00:00:00 2001 From: Kevin Collins Date: Wed, 25 Mar 2026 13:49:14 -0400 Subject: [PATCH 1/5] add new lightspeed bedrock doc --- content/_index.md | 1 + content/rosa/lightspeed-bedrock/index.md | 732 +++++++++++++++++++++++ 2 files changed, 733 insertions(+) create mode 100644 content/rosa/lightspeed-bedrock/index.md diff --git a/content/_index.md b/content/_index.md index cc798a00c..467c2b309 100644 --- a/content/_index.md +++ b/content/_index.md @@ -31,6 +31,7 @@ description: "Step-by-step tutorials from Red Hat experts to help you get the mo * [AWS Secrets Manager Secrets on ROSA HCP with External Secrets Operator and STS](/experts/rosa/eso) * [Configure Node Pool Scale-to-Zero on ROSA HCP](/experts/rosa/scale-to-zero) * [Install Portworx on Red Hat OpenShift Service on AWS with hosted control planes](/experts/rosa/rosa-hcp-portworx/) +* [Using OpenShift Lightspeed with AWS Bedrock on ROSA](/experts/rosa/lightspeed-bedrock/) #### Classic architecture diff --git a/content/rosa/lightspeed-bedrock/index.md b/content/rosa/lightspeed-bedrock/index.md new file mode 100644 index 000000000..fa210156b --- /dev/null +++ b/content/rosa/lightspeed-bedrock/index.md @@ -0,0 +1,732 @@ +--- +date: '2026-03-25' +title: Using OpenShift Lightspeed with AWS Bedrock on ROSA +tags: ["ROSA", "AWS", "HCP", "AI"] +authors: + - Kevin Collins +--- + +{{% alert state="info" %}}This guide has been validated on **OpenShift 4.20**. Operator CRD names, API versions, and console paths may differ on other versions.{{% /alert %}} + +OpenShift Lightspeed is an AI-powered assistant that helps developers and administrators interact with OpenShift using natural language. This guide walks you through integrating OpenShift Lightspeed with AWS Bedrock on Red Hat OpenShift Service on AWS (ROSA). + +## Prerequisites + +* ROSA Cluster (4.20+) +* AWS CLI configured with appropriate credentials +* `oc` CLI logged in as cluster-admin +* `rosa` CLI +* An AWS account with access to Amazon Bedrock +* Access to a supported foundation model in Bedrock (e.g., Claude, Llama, etc.) + +## Architecture Overview + +OpenShift Lightspeed uses Large Language Models (LLMs) to provide intelligent assistance. By integrating with AWS Bedrock, you can leverage AWS-managed foundation models while keeping your OpenShift environment secure and compliant. + +The integration uses: +- **AWS Bedrock**: Provides the foundation models for AI inference +- **IRSA (IAM Roles for Service Accounts)**: Enables secure authentication from ROSA to AWS Bedrock +- **OpenShift Lightspeed Operator**: Manages the Lightspeed service on your cluster +- **Bedrock Proxy**: Translation layer that bridges Lightspeed with Bedrock (see below) + +### Bedrock Proxy Component + +The **bedrock-proxy** is a critical translation layer that enables OpenShift Lightspeed to communicate with AWS Bedrock. OpenShift Lightspeed is built to work with OpenAI-compatible APIs, but AWS Bedrock has its own unique API format. Rather than modifying Lightspeed itself, this lightweight proxy makes Bedrock "speak OpenAI" so they can communicate seamlessly. + +**What the Bedrock Proxy Does:** + +1. **API Translation** + - Receives OpenAI format requests from Lightspeed (`/v1/chat/completions`) + - Translates them to Bedrock format and calls the appropriate model + +2. **Message Format Conversion** + - Amazon Nova doesn't support `system` role messages + - The proxy extracts system prompts and prepends them to the first user message + - Ensures only `user` and `assistant` roles are sent to Bedrock + +3. **Parameter Mapping** + - Converts OpenAI's `max_tokens` → Bedrock's `max_new_tokens` + - Transforms message structure from simple strings to Nova's `content: [{"text": "..."}]` format + +4. **Streaming Support** + - Converts Bedrock streaming responses to OpenAI-compatible Server-Sent Events (SSE) + - Reformats Bedrock's `contentBlockDelta` events into OpenAI's `delta` format + - Ensures Lightspeed receives responses in the expected streaming format + +5. **Authentication** + - Uses IRSA (IAM Roles for Service Accounts) for secure AWS authentication + - The pod's service account token is projected and used to assume the AWS IAM role + - No static credentials needed - all authentication is handled via the service account + +6. **Multi-Model Support** + - Handles both Claude and Amazon Nova models + - Automatically detects model type and applies appropriate format conversion + +## Enable Amazon Bedrock Access + +1. Enable model access in Amazon Bedrock + + Navigate to the AWS Bedrock console and enable access to your desired foundation model. For this guide, we'll use Anthropic Claude. + + ```bash + aws bedrock list-foundation-models --region us-east-1 \ + --query 'modelSummaries[?contains(modelId, `anthropic.claude`)].[modelId,modelName]' \ + --output table + ``` + +1. Request model access if needed + + If you don't have access to the model, request it through the AWS Bedrock console: + - Navigate to Amazon Bedrock → Model access + - Click "Request model access" + - Select the model(s) you want to use + - Submit the request + +## Configure IAM for Bedrock Access + +1. Set environment variables + + ```bash + export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) + export CLUSTER_NAME= + export AWS_REGION=$(rosa describe cluster -c ${CLUSTER_NAME} --output json | jq -r .region.id) + export BEDROCK_MODEL_ID=anthropic.claude-3-5-sonnet-20241022-v2:0 + export OIDC_ENDPOINT=$(rosa describe cluster -c ${CLUSTER_NAME} --output json | jq -r .aws.sts.oidc_endpoint_url | sed 's|^https://||') + export LIGHTSPEED_NAMESPACE=openshift-lightspeed + export SERVICE_ACCOUNT_NAME=lightspeed-service-account + ``` + +1. Create IAM policy for Bedrock access + + ```bash + BEDROCK_POLICY=$(cat < app.py <<'PYTHON_EOF' + from flask import Flask, request, Response, stream_with_context + import boto3 + import json + import os + + app = Flask(__name__) + bedrock = boto3.client('bedrock-runtime', region_name=os.environ.get('AWS_REGION', 'us-east-1')) + + @app.route('/v1/chat/completions', methods=['POST']) + def chat_completions(): + data = request.json + model = data.get('model', os.environ.get('BEDROCK_MODEL_ID')) + messages = data.get('messages', []) + + # Convert OpenAI format to Bedrock format + prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages]) + + body = json.dumps({ + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": data.get('max_tokens', 4096), + "messages": [{"role": "user", "content": prompt}] + }) + + response = bedrock.invoke_model( + modelId=model, + body=body + ) + + response_body = json.loads(response['body'].read()) + + # Convert Bedrock response to OpenAI format + openai_response = { + "id": "chatcmpl-" + response['ResponseMetadata']['RequestId'], + "object": "chat.completion", + "created": int(response['ResponseMetadata']['HTTPHeaders']['date']), + "model": model, + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": response_body['content'][0]['text'] + }, + "finish_reason": response_body['stop_reason'] + }] + } + + return json.dumps(openai_response) + + if __name__ == '__main__': + app.run(host='0.0.0.0', port=8000) + PYTHON_EOF + + cat > requirements.txt <<'EOF' + flask==3.0.0 + boto3==1.34.0 + EOF + + cat > Dockerfile <<'EOF' + FROM registry.access.redhat.com/ubi9/python-39:latest + USER root + WORKDIR /app + COPY requirements.txt . + RUN pip install --no-cache-dir -r requirements.txt + COPY app.py . + USER 1001 + EXPOSE 8000 + CMD ["python", "app.py"] + EOF + ``` + +1. Expose the OpenShift internal image registry + + ```bash + oc patch configs.imageregistry.operator.openshift.io/cluster \ + --type merge \ + --patch '{"spec":{"defaultRoute":true}}' + ``` + +1. Get the OpenShift internal registry route + + ```bash + export REGISTRY=$(oc get route default-route -n openshift-image-registry -o jsonpath='{.spec.host}') + echo $REGISTRY + ``` + +1. Login to the OpenShift internal registry + + ```bash + podman login -u $(oc whoami) -p $(oc whoami -t) $REGISTRY + ``` + +1. Build and tag the image for the internal registry + + {{% alert state="info" %}}If building on a Mac, specify the platform to ensure compatibility with OpenShift's x86_64 nodes.{{% /alert %}} + + ```bash + podman build --platform linux/amd64 -t $REGISTRY/${LIGHTSPEED_NAMESPACE}/bedrock-proxy:latest . + ``` + +1. Push the image to OpenShift internal registry + + ```bash + podman push $REGISTRY/${LIGHTSPEED_NAMESPACE}/bedrock-proxy:latest + ``` + +1. Deploy the proxy + + ```bash + cat < Date: Wed, 25 Mar 2026 14:46:43 -0400 Subject: [PATCH 2/5] fixed tags --- content/rosa/lightspeed-bedrock/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/rosa/lightspeed-bedrock/index.md b/content/rosa/lightspeed-bedrock/index.md index fa210156b..88e95d9f9 100644 --- a/content/rosa/lightspeed-bedrock/index.md +++ b/content/rosa/lightspeed-bedrock/index.md @@ -1,7 +1,7 @@ --- date: '2026-03-25' title: Using OpenShift Lightspeed with AWS Bedrock on ROSA -tags: ["ROSA", "AWS", "HCP", "AI"] +tags: ["ROSA", "ROSA HCP", "Lightspeed"] authors: - Kevin Collins --- From 65248e543c884e27907859fd1c9ea22b2a7ad1e2 Mon Sep 17 00:00:00 2001 From: Kumudu Herath Date: Wed, 25 Mar 2026 23:40:11 -0700 Subject: [PATCH 3/5] Fix indentation for bedrock model configuration fixed OLSConfig custom resource's yaml indentation --- content/rosa/lightspeed-bedrock/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/rosa/lightspeed-bedrock/index.md b/content/rosa/lightspeed-bedrock/index.md index 88e95d9f9..b745580b5 100644 --- a/content/rosa/lightspeed-bedrock/index.md +++ b/content/rosa/lightspeed-bedrock/index.md @@ -514,8 +514,8 @@ The **bedrock-proxy** is a critical translation layer that enables OpenShift Lig credentialsSecretRef: name: bedrock-credentials models: - - name: ${BEDROCK_MODEL_ID} - url: http://bedrock-proxy.${LIGHTSPEED_NAMESPACE}.svc.cluster.local:8000/v1 + - name: ${BEDROCK_MODEL_ID} + url: http://bedrock-proxy.${LIGHTSPEED_NAMESPACE}.svc.cluster.local:8000/v1 ols: conversationCache: type: postgres From 385c03790c3e61906f94e34e06a6f2821e0ea51c Mon Sep 17 00:00:00 2001 From: Kumudu Herath Date: Thu, 26 Mar 2026 23:24:32 -0700 Subject: [PATCH 4/5] Update BEDROCK_MODEL_ID and enable streaming responses Updated the BEDROCK_MODEL_ID to a new version and modified the chat_completions function to support streaming responses. --- content/rosa/lightspeed-bedrock/index.md | 102 +++++++++++++++++------ 1 file changed, 75 insertions(+), 27 deletions(-) diff --git a/content/rosa/lightspeed-bedrock/index.md b/content/rosa/lightspeed-bedrock/index.md index b745580b5..f68314ddf 100644 --- a/content/rosa/lightspeed-bedrock/index.md +++ b/content/rosa/lightspeed-bedrock/index.md @@ -90,7 +90,7 @@ The **bedrock-proxy** is a critical translation layer that enables OpenShift Lig export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) export CLUSTER_NAME= export AWS_REGION=$(rosa describe cluster -c ${CLUSTER_NAME} --output json | jq -r .region.id) - export BEDROCK_MODEL_ID=anthropic.claude-3-5-sonnet-20241022-v2:0 + export BEDROCK_MODEL_ID=anthropic.claude-3-5-sonnet-20250219-v2:0 export OIDC_ENDPOINT=$(rosa describe cluster -c ${CLUSTER_NAME} --output json | jq -r .aws.sts.oidc_endpoint_url | sed 's|^https://||') export LIGHTSPEED_NAMESPACE=openshift-lightspeed export SERVICE_ACCOUNT_NAME=lightspeed-service-account @@ -333,6 +333,7 @@ The **bedrock-proxy** is a critical translation layer that enables OpenShift Lig import boto3 import json import os + import time app = Flask(__name__) bedrock = boto3.client('bedrock-runtime', region_name=os.environ.get('AWS_REGION', 'us-east-1')) @@ -342,40 +343,87 @@ The **bedrock-proxy** is a critical translation layer that enables OpenShift Lig data = request.json model = data.get('model', os.environ.get('BEDROCK_MODEL_ID')) messages = data.get('messages', []) + stream = data.get('stream', False) - # Convert OpenAI format to Bedrock format prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages]) - body = json.dumps({ "anthropic_version": "bedrock-2023-05-31", "max_tokens": data.get('max_tokens', 4096), "messages": [{"role": "user", "content": prompt}] }) - response = bedrock.invoke_model( - modelId=model, - body=body - ) - - response_body = json.loads(response['body'].read()) - - # Convert Bedrock response to OpenAI format - openai_response = { - "id": "chatcmpl-" + response['ResponseMetadata']['RequestId'], - "object": "chat.completion", - "created": int(response['ResponseMetadata']['HTTPHeaders']['date']), - "model": model, - "choices": [{ - "index": 0, - "message": { - "role": "assistant", - "content": response_body['content'][0]['text'] - }, - "finish_reason": response_body['stop_reason'] - }] - } - - return json.dumps(openai_response) + if stream: + # Streaming response + def generate(): + response = bedrock.invoke_model_with_response_stream( + modelId=model, + body=body + ) + + request_id = response['ResponseMetadata']['RequestId'] + + for event in response['body']: + chunk = json.loads(event['chunk']['bytes'].decode()) + + if chunk['type'] == 'content_block_delta': + # Send SSE chunk + sse_chunk = { + "id": f"chatcmpl-{request_id}", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": model, + "choices": [{ + "index": 0, + "delta": { + "content": chunk['delta']['text'] + }, + "finish_reason": None + }] + } + yield f"data: {json.dumps(sse_chunk)}\n\n" + + elif chunk['type'] == 'message_stop': + # Send final chunk + final_chunk = { + "id": f"chatcmpl-{request_id}", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": model, + "choices": [{ + "index": 0, + "delta": {}, + "finish_reason": chunk.get('stop_reason', 'stop') + }] + } + yield f"data: {json.dumps(final_chunk)}\n\n" + yield "data: [DONE]\n\n" + + return Response(stream_with_context(generate()), mimetype='text/event-stream') + + else: + # Non-streaming response (original code) + response = bedrock.invoke_model( + modelId=model, + body=body + ) + + response_body = json.loads(response['body'].read()) + + openai_response = { + "id": "chatcmpl-" + response['ResponseMetadata']['RequestId'], + "object": "chat.completion", + "created": int(time.time()), + "model": model, + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": response_body['content'][0]['text'] + }, + "finish_reason": response_body['stop_reason'] + }] + } + return json.dumps(openai_response) if __name__ == '__main__': app.run(host='0.0.0.0', port=8000) From 7a805409ea205d81aacd76052786d1c081451ba1 Mon Sep 17 00:00:00 2001 From: Kevin Collins Date: Fri, 27 Mar 2026 10:45:45 -0400 Subject: [PATCH 5/5] add author --- content/rosa/lightspeed-bedrock/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/content/rosa/lightspeed-bedrock/index.md b/content/rosa/lightspeed-bedrock/index.md index f68314ddf..edf3fdaaa 100644 --- a/content/rosa/lightspeed-bedrock/index.md +++ b/content/rosa/lightspeed-bedrock/index.md @@ -4,6 +4,7 @@ title: Using OpenShift Lightspeed with AWS Bedrock on ROSA tags: ["ROSA", "ROSA HCP", "Lightspeed"] authors: - Kevin Collins + - Kumudu Herath --- {{% alert state="info" %}}This guide has been validated on **OpenShift 4.20**. Operator CRD names, API versions, and console paths may differ on other versions.{{% /alert %}}