Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ jobs:
run: ./deploy.sh

- name: Test local uvicorn/FastAPI server
env:
API_KEY: bedrock
run: |
mv .aws-sam/build/BedrockAccessGatewayLayer/python/* ./app
(cd app && ./run.sh &)
Expand Down
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,11 @@ Read more on the creation of this project [here](https://dev.to/aws-builders/use

- Removes Application Load Balancer -> Lambda Function URL
- Docker Lambda Runtime -> Python Lambda Runtime
- Uses patch files for clean, maintainable modifications to upstream code
- Graceful API key fallback (works in local testing without AWS services)
- Optionally removes `numpy` and `tiktoken` dependencies when embedding models are not needed
- Deployment option with CloudShell -> Fast and efficient!
- Optional Secrets Manager template for enhanced security

## Deployment

Expand Down Expand Up @@ -370,3 +373,17 @@ curl "${FUNCTION_URL}api/v1/chat/completions" \
# > data: {"id":"chatcmpl-61c29444","created":1735753748,"model":"amazon.nova-micro-v1:0","system_fingerprint":"fp","choices":[{"index":0,"finish_reason":null,"logprobs":null,"delta":{"content":""}}],"object":"chat.completion.chunk","usage":null}
# > data: {"id":"chatcmpl-61c29444","created":1735753748,"model":"amazon.nova-micro-v1:0","system_fingerprint":"fp","choices":[{"index":0,"finish_reason":null,"logprobs":null,"delta":{"content":" 2"}}],"object":"chat.completion.chunk","usage":null}
```

## Alternative: Secrets Manager Deployment

For enhanced security, you can use the Secrets Manager template which stores the API key in AWS Secrets Manager instead of as an environment variable:

```shell
# Build with Secrets Manager template
sam build -t template-with-secrets-manager.yaml

# Deploy
sam deploy --guided
```

**Cost consideration**: This adds ~$0.40/month for Secrets Manager storage, but provides better security and allows API key rotation without redeploying the Lambda function.
10 changes: 10 additions & 0 deletions patches/app.py.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
--- a/api/app.py
+++ b/api/app.py
@@ -6,7 +6,6 @@ from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.openapi.docs import get_swagger_ui_html
from fastapi.staticfiles import StaticFiles
-from mangum import Mangum

from api.routers import chat, embeddings, model
from api.setting import (
51 changes: 51 additions & 0 deletions patches/auth.py.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
--- a/api/auth.py
+++ b/api/auth.py
@@ -1,7 +1,6 @@
import json
import os
from typing import Annotated
-
import boto3
from botocore.exceptions import ClientError
from fastapi import Depends, HTTPException, status
@@ -11,28 +10,23 @@ from api.setting import DEFAULT_API_KEYS

api_key_param = os.environ.get("API_KEY_PARAM_NAME")
api_key_secret_arn = os.environ.get("API_KEY_SECRET_ARN")
api_key_env = os.environ.get("API_KEY")
+api_key = None
+
if api_key_param:
- # For backward compatibility.
- # Please now use secrets manager instead.
- ssm = boto3.client("ssm")
- api_key = ssm.get_parameter(Name=api_key_param, WithDecryption=True)["Parameter"]["Value"]
+ try:
+ ssm = boto3.client("ssm")
+ api_key = ssm.get_parameter(Name=api_key_param, WithDecryption=True)["Parameter"]["Value"]
+ except Exception:
+ pass
elif api_key_secret_arn:
- sm = boto3.client("secretsmanager")
try:
+ sm = boto3.client("secretsmanager")
response = sm.get_secret_value(SecretId=api_key_secret_arn)
if "SecretString" in response:
secret = json.loads(response["SecretString"])
api_key = secret["api_key"]
- except ClientError:
- raise RuntimeError("Unable to retrieve API KEY, please ensure the secret ARN is correct")
- except KeyError:
- raise RuntimeError('Please ensure the secret contains a "api_key" field')
+ except Exception:
+ pass
elif api_key_env:
api_key = api_key_env
-else:
- # For local use only.
- api_key = DEFAULT_API_KEYS
+
+if not api_key:
+ api_key = DEFAULT_API_KEYS

security = HTTPBearer()
113 changes: 113 additions & 0 deletions patches/no-embeddings.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
--- a/api/models/bedrock.py
+++ b/api/models/bedrock.py
@@ -1,6 +1,4 @@
import json
-import numpy as np
-import tiktoken
from typing import Any

import boto3
@@ -17,8 +15,6 @@ from api.schema import (

from api.setting import AWS_REGION, ENABLE_CROSS_REGION_INFERENCE

-ENCODER = tiktoken.get_encoding("cl100k_base")
-
bedrock_runtime = boto3.client(service_name="bedrock-runtime", region_name=AWS_REGION)
bedrock = boto3.client(service_name="bedrock", region_name=AWS_REGION)

@@ -500,70 +496,3 @@ class BedrockChatModel(ChatModel):
)

return response
-
-
-class BedrockEmbeddingsModel(EmbeddingsModel):
- def __init__(self, model_id: str):
- super().__init__(model_id)
-
- def _get_bedrock_embeddings(self, texts: list[str]) -> list[list[float]]:
- embeddings = []
- for text in texts:
- body = json.dumps({"inputText": text})
- response = bedrock_runtime.invoke_model(
- body=body,
- modelId=self.model_id,
- accept="application/json",
- contentType="application/json",
- )
- response_body = json.loads(response.get("body").read())
- embeddings.append(response_body.get("embedding"))
- return embeddings
-
- def _get_titan_multimodal_embeddings(self, texts: list[str]) -> list[list[float]]:
- embeddings = []
- for text in texts:
- body = json.dumps({"inputText": text, "embeddingConfig": {"outputEmbeddingLength": 1024}})
- response = bedrock_runtime.invoke_model(
- body=body,
- modelId=self.model_id,
- accept="application/json",
- contentType="application/json",
- )
- response_body = json.loads(response.get("body").read())
- embeddings.append(response_body.get("embedding"))
- return embeddings
-
- def embeddings(self, request: EmbeddingsRequest) -> EmbeddingsResponse:
- if "amazon.titan-embed-image" in self.model_id:
- embeddings = self._get_titan_multimodal_embeddings(request.input)
- else:
- embeddings = self._get_bedrock_embeddings(request.input)
-
- data = []
- total_tokens = 0
- for i, embedding in enumerate(embeddings):
- tokens = len(ENCODER.encode(request.input[i]))
- total_tokens += tokens
- data.append(
- EmbeddingsData(
- object="embedding",
- embedding=embedding,
- index=i,
- )
- )
-
- response = EmbeddingsResponse(
- object="list",
- data=data,
- model=self.model_id,
- usage=EmbeddingsUsage(
- prompt_tokens=total_tokens,
- total_tokens=total_tokens,
- ),
- )
-
- return response
--- a/api/app.py
+++ b/api/app.py
@@ -8,7 +8,7 @@ from fastapi.openapi.docs import get_swagger_ui_html
from fastapi.staticfiles import StaticFiles
from mangum import Mangum

-from api.routers import chat, embeddings, model
+from api.routers import chat, model
from api.setting import (
API_ROUTE_PREFIX,
DEBUG,
@@ -48,7 +48,6 @@ async def health():


app.include_router(chat.router, prefix=API_ROUTE_PREFIX)
-app.include_router(embeddings.router, prefix=API_ROUTE_PREFIX)
app.include_router(model.router, prefix=API_ROUTE_PREFIX)


--- a/requirements.txt
+++ b/requirements.txt
@@ -6,5 +6,3 @@ uvicorn==0.34.0
boto3==1.35.80
botocore==1.35.80
jmespath==1.0.1
-tiktoken==0.8.0
-numpy==2.2.1
9 changes: 9 additions & 0 deletions patches/pydantic-version.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
mangum==0.19.0
fastapi==0.115.6
-pydantic==2.10.4
+pydantic>=2.10.4
pydantic-settings==2.6.1
uvicorn==0.34.0
7 changes: 7 additions & 0 deletions patches/requirements.txt.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-mangum==0.19.0
fastapi==0.115.6
pydantic==2.10.4
pydantic-settings==2.6.1
60 changes: 12 additions & 48 deletions prepare_source.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/bin/bash

# Display help message
show_help() {
echo "Usage: $0 [OPTIONS]"
echo ""
Expand All @@ -12,17 +11,8 @@ show_help() {
echo ""
}

# Function to perform sed operation and remove backup file
sed_edit() {
local pattern="$1"
local file="$2"
sed -i.bak "$pattern" "$file" && rm "${file}.bak"
}

# Initialize variables
NO_EMBEDDINGS=false

# Parse command line arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--help|-h)
Expand All @@ -44,64 +34,38 @@ done
REPO_DIR="build/bedrock-access-gateway"

mkdir -p build

rm -rf app/api
rm -f layer/requirements.txt

# Check if the repository is already cloned
if [ -d "$REPO_DIR" ]; then
echo "Repository already cloned, fetching latest changes"
# fetch latest changes
(
cd $REPO_DIR
git fetch
)
(cd $REPO_DIR && git fetch)
else
echo "Cloning aws-samples/bedrock-access-gateway repository"
git clone --depth 1 https://github.com/aws-samples/bedrock-access-gateway $REPO_DIR
fi

cp -r $REPO_DIR/src/api app/api
cp $REPO_DIR/src/requirements.txt layer/requirements.txt

# To surpress warnings
echo "" > app/requirements.txt

# Remove "Manum" from requirements.txt, as LWA is used instead.
grep -v "mangum" $REPO_DIR/src/requirements.txt > layer/requirements.txt
grep -v "Mangum" $REPO_DIR/src/api/app.py > app/api/app.py
# Apply patches
echo "Applying patches"
(cd $REPO_DIR/src && patch -p1 < ../../../patches/auth.py.patch)
(cd $REPO_DIR/src && patch -p1 < ../../../patches/app.py.patch)
(cd $REPO_DIR/src && patch -p1 < ../../../patches/requirements.txt.patch)
(cd $REPO_DIR/src && patch -p1 < ../../../patches/pydantic-version.patch)

# Check if --no-embeddings flag is set
if [ "$NO_EMBEDDINGS" = true ]; then
echo "Deleting embeddings related code and dependencies"

# Apply patterns to specific files directly

# For app/api/models/bedrock.py
sed_edit '/^import numpy/d' "app/api/models/bedrock.py"
sed_edit '/^import tiktoken/d' "app/api/models/bedrock.py"
sed_edit '/^ENCODER = /d' "app/api/models/bedrock.py"
# This removes the final part which consists of embedding related code
sed_edit '/^class BedrockEmbeddingsModel/,$d' "app/api/models/bedrock.py"

# For app/api/app.py
# Remove import of the embeddings model
sed_edit 's/, embeddings//g' "app/api/app.py"
# Remove the route for embeddings
sed_edit '/embeddings.router/d' "app/api/app.py"

# For layer/requirements.txt
sed_edit '/^tiktoken/d' "layer/requirements.txt"
sed_edit '/^numpy/d' "layer/requirements.txt"
echo "Applying no-embeddings patch"
(cd $REPO_DIR/src && patch -p1 < ../../../patches/no-embeddings.patch)
fi

# Pydantic need to be >= 2.10.4 in order to fix a installation issue
sed_edit 's/pydantic==.*/pydantic>=2.10.4/g' layer/requirements.txt

# Update boto3/botocore to latest versions
for pkg in boto3 botocore; do
# pip index versions is no longer experimental from pip 25.1
VERSION=$(pip index versions $pkg | grep -m 1 "LATEST: " | awk '{print $2}')
VERSION=$(pip index versions $pkg 2>/dev/null | grep -m 1 "LATEST: " | awk '{print $2}')
if [ -n "$VERSION" ]; then
sed_edit "s/$pkg==.*/$pkg==$VERSION/g" layer/requirements.txt
sed -i.bak "s/$pkg==.*/$pkg==$VERSION/g" layer/requirements.txt && rm layer/requirements.txt.bak
fi
done
Loading