From 9c0044f7b9bab6a8003cf5f47f307803469a177d Mon Sep 17 00:00:00 2001 From: Gabriel Koo Date: Sun, 26 Oct 2025 12:08:04 +0800 Subject: [PATCH] Refactor to use patches and add Secrets Manager template - Refactor prepare_source.sh to use patch files instead of sed - Add patches/ directory with 5 patch files for clean modifications - Add auth.py patch for graceful API key fallback - Add Secrets Manager template option (template-with-secrets-manager.yaml) - Update GitHub Actions workflow to set API_KEY env var for tests - Update README with patches info and Secrets Manager deployment option --- .github/workflows/test-build.yml | 2 + README.md | 17 ++++ patches/app.py.patch | 10 +++ patches/auth.py.patch | 51 +++++++++++ patches/no-embeddings.patch | 113 ++++++++++++++++++++++++ patches/pydantic-version.patch | 9 ++ patches/requirements.txt.patch | 7 ++ prepare_source.sh | 60 +++---------- template-with-secrets-manager.yaml | 133 +++++++++++++++++++++++++++++ 9 files changed, 354 insertions(+), 48 deletions(-) create mode 100644 patches/app.py.patch create mode 100644 patches/auth.py.patch create mode 100644 patches/no-embeddings.patch create mode 100644 patches/pydantic-version.patch create mode 100644 patches/requirements.txt.patch create mode 100644 template-with-secrets-manager.yaml diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index ed4d6ca..0414e99 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -38,6 +38,8 @@ jobs: run: ./deploy.sh - name: Test local uvicorn/FastAPI server + env: + API_KEY: bedrock run: | mv .aws-sam/build/BedrockAccessGatewayLayer/python/* ./app (cd app && ./run.sh &) diff --git a/README.md b/README.md index c97ca39..865b799 100644 --- a/README.md +++ b/README.md @@ -65,8 +65,11 @@ Read more on the creation of this project [here](https://dev.to/aws-builders/use - Removes Application Load Balancer -> Lambda Function URL - Docker Lambda Runtime -> Python Lambda Runtime +- Uses patch files for clean, maintainable modifications to upstream code +- Graceful API key fallback (works in local testing without AWS services) - Optionally removes `numpy` and `tiktoken` dependencies when embedding models are not needed - Deployment option with CloudShell -> Fast and efficient! +- Optional Secrets Manager template for enhanced security ## Deployment @@ -370,3 +373,17 @@ curl "${FUNCTION_URL}api/v1/chat/completions" \ # > data: {"id":"chatcmpl-61c29444","created":1735753748,"model":"amazon.nova-micro-v1:0","system_fingerprint":"fp","choices":[{"index":0,"finish_reason":null,"logprobs":null,"delta":{"content":""}}],"object":"chat.completion.chunk","usage":null} # > data: {"id":"chatcmpl-61c29444","created":1735753748,"model":"amazon.nova-micro-v1:0","system_fingerprint":"fp","choices":[{"index":0,"finish_reason":null,"logprobs":null,"delta":{"content":" 2"}}],"object":"chat.completion.chunk","usage":null} ``` + +## Alternative: Secrets Manager Deployment + +For enhanced security, you can use the Secrets Manager template which stores the API key in AWS Secrets Manager instead of as an environment variable: + +```shell +# Build with Secrets Manager template +sam build -t template-with-secrets-manager.yaml + +# Deploy +sam deploy --guided +``` + +**Cost consideration**: This adds ~$0.40/month for Secrets Manager storage, but provides better security and allows API key rotation without redeploying the Lambda function. diff --git a/patches/app.py.patch b/patches/app.py.patch new file mode 100644 index 0000000..755b272 --- /dev/null +++ b/patches/app.py.patch @@ -0,0 +1,10 @@ +--- a/api/app.py ++++ b/api/app.py +@@ -6,7 +6,6 @@ from fastapi.responses import StreamingResponse + from fastapi.middleware.cors import CORSMiddleware + from fastapi.openapi.docs import get_swagger_ui_html + from fastapi.staticfiles import StaticFiles +-from mangum import Mangum + + from api.routers import chat, embeddings, model + from api.setting import ( diff --git a/patches/auth.py.patch b/patches/auth.py.patch new file mode 100644 index 0000000..668fb0d --- /dev/null +++ b/patches/auth.py.patch @@ -0,0 +1,51 @@ +--- a/api/auth.py ++++ b/api/auth.py +@@ -1,7 +1,6 @@ + import json + import os + from typing import Annotated +- + import boto3 + from botocore.exceptions import ClientError + from fastapi import Depends, HTTPException, status +@@ -11,28 +10,23 @@ from api.setting import DEFAULT_API_KEYS + + api_key_param = os.environ.get("API_KEY_PARAM_NAME") + api_key_secret_arn = os.environ.get("API_KEY_SECRET_ARN") + api_key_env = os.environ.get("API_KEY") ++api_key = None ++ + if api_key_param: +- # For backward compatibility. +- # Please now use secrets manager instead. +- ssm = boto3.client("ssm") +- api_key = ssm.get_parameter(Name=api_key_param, WithDecryption=True)["Parameter"]["Value"] ++ try: ++ ssm = boto3.client("ssm") ++ api_key = ssm.get_parameter(Name=api_key_param, WithDecryption=True)["Parameter"]["Value"] ++ except Exception: ++ pass + elif api_key_secret_arn: +- sm = boto3.client("secretsmanager") + try: ++ sm = boto3.client("secretsmanager") + response = sm.get_secret_value(SecretId=api_key_secret_arn) + if "SecretString" in response: + secret = json.loads(response["SecretString"]) + api_key = secret["api_key"] +- except ClientError: +- raise RuntimeError("Unable to retrieve API KEY, please ensure the secret ARN is correct") +- except KeyError: +- raise RuntimeError('Please ensure the secret contains a "api_key" field') ++ except Exception: ++ pass + elif api_key_env: + api_key = api_key_env +-else: +- # For local use only. +- api_key = DEFAULT_API_KEYS ++ ++if not api_key: ++ api_key = DEFAULT_API_KEYS + + security = HTTPBearer() diff --git a/patches/no-embeddings.patch b/patches/no-embeddings.patch new file mode 100644 index 0000000..29f198b --- /dev/null +++ b/patches/no-embeddings.patch @@ -0,0 +1,113 @@ +--- a/api/models/bedrock.py ++++ b/api/models/bedrock.py +@@ -1,6 +1,4 @@ + import json +-import numpy as np +-import tiktoken + from typing import Any + + import boto3 +@@ -17,8 +15,6 @@ from api.schema import ( + + from api.setting import AWS_REGION, ENABLE_CROSS_REGION_INFERENCE + +-ENCODER = tiktoken.get_encoding("cl100k_base") +- + bedrock_runtime = boto3.client(service_name="bedrock-runtime", region_name=AWS_REGION) + bedrock = boto3.client(service_name="bedrock", region_name=AWS_REGION) + +@@ -500,70 +496,3 @@ class BedrockChatModel(ChatModel): + ) + + return response +- +- +-class BedrockEmbeddingsModel(EmbeddingsModel): +- def __init__(self, model_id: str): +- super().__init__(model_id) +- +- def _get_bedrock_embeddings(self, texts: list[str]) -> list[list[float]]: +- embeddings = [] +- for text in texts: +- body = json.dumps({"inputText": text}) +- response = bedrock_runtime.invoke_model( +- body=body, +- modelId=self.model_id, +- accept="application/json", +- contentType="application/json", +- ) +- response_body = json.loads(response.get("body").read()) +- embeddings.append(response_body.get("embedding")) +- return embeddings +- +- def _get_titan_multimodal_embeddings(self, texts: list[str]) -> list[list[float]]: +- embeddings = [] +- for text in texts: +- body = json.dumps({"inputText": text, "embeddingConfig": {"outputEmbeddingLength": 1024}}) +- response = bedrock_runtime.invoke_model( +- body=body, +- modelId=self.model_id, +- accept="application/json", +- contentType="application/json", +- ) +- response_body = json.loads(response.get("body").read()) +- embeddings.append(response_body.get("embedding")) +- return embeddings +- +- def embeddings(self, request: EmbeddingsRequest) -> EmbeddingsResponse: +- if "amazon.titan-embed-image" in self.model_id: +- embeddings = self._get_titan_multimodal_embeddings(request.input) +- else: +- embeddings = self._get_bedrock_embeddings(request.input) +- +- data = [] +- total_tokens = 0 +- for i, embedding in enumerate(embeddings): +- tokens = len(ENCODER.encode(request.input[i])) +- total_tokens += tokens +- data.append( +- EmbeddingsData( +- object="embedding", +- embedding=embedding, +- index=i, +- ) +- ) +- +- response = EmbeddingsResponse( +- object="list", +- data=data, +- model=self.model_id, +- usage=EmbeddingsUsage( +- prompt_tokens=total_tokens, +- total_tokens=total_tokens, +- ), +- ) +- +- return response +--- a/api/app.py ++++ b/api/app.py +@@ -8,7 +8,7 @@ from fastapi.openapi.docs import get_swagger_ui_html + from fastapi.staticfiles import StaticFiles + from mangum import Mangum + +-from api.routers import chat, embeddings, model ++from api.routers import chat, model + from api.setting import ( + API_ROUTE_PREFIX, + DEBUG, +@@ -48,7 +48,6 @@ async def health(): + + + app.include_router(chat.router, prefix=API_ROUTE_PREFIX) +-app.include_router(embeddings.router, prefix=API_ROUTE_PREFIX) + app.include_router(model.router, prefix=API_ROUTE_PREFIX) + + +--- a/requirements.txt ++++ b/requirements.txt +@@ -6,5 +6,3 @@ uvicorn==0.34.0 + boto3==1.35.80 + botocore==1.35.80 + jmespath==1.0.1 +-tiktoken==0.8.0 +-numpy==2.2.1 diff --git a/patches/pydantic-version.patch b/patches/pydantic-version.patch new file mode 100644 index 0000000..6e2b30d --- /dev/null +++ b/patches/pydantic-version.patch @@ -0,0 +1,9 @@ +--- a/requirements.txt ++++ b/requirements.txt +@@ -1,5 +1,5 @@ + mangum==0.19.0 + fastapi==0.115.6 +-pydantic==2.10.4 ++pydantic>=2.10.4 + pydantic-settings==2.6.1 + uvicorn==0.34.0 diff --git a/patches/requirements.txt.patch b/patches/requirements.txt.patch new file mode 100644 index 0000000..5c3d7b8 --- /dev/null +++ b/patches/requirements.txt.patch @@ -0,0 +1,7 @@ +--- a/requirements.txt ++++ b/requirements.txt +@@ -1,4 +1,3 @@ +-mangum==0.19.0 + fastapi==0.115.6 + pydantic==2.10.4 + pydantic-settings==2.6.1 diff --git a/prepare_source.sh b/prepare_source.sh index ad56f44..e888109 100755 --- a/prepare_source.sh +++ b/prepare_source.sh @@ -1,6 +1,5 @@ #!/bin/bash -# Display help message show_help() { echo "Usage: $0 [OPTIONS]" echo "" @@ -12,17 +11,8 @@ show_help() { echo "" } -# Function to perform sed operation and remove backup file -sed_edit() { - local pattern="$1" - local file="$2" - sed -i.bak "$pattern" "$file" && rm "${file}.bak" -} - -# Initialize variables NO_EMBEDDINGS=false -# Parse command line arguments while [[ $# -gt 0 ]]; do case "$1" in --help|-h) @@ -44,64 +34,38 @@ done REPO_DIR="build/bedrock-access-gateway" mkdir -p build - rm -rf app/api rm -f layer/requirements.txt -# Check if the repository is already cloned if [ -d "$REPO_DIR" ]; then echo "Repository already cloned, fetching latest changes" - # fetch latest changes - ( - cd $REPO_DIR - git fetch - ) + (cd $REPO_DIR && git fetch) else echo "Cloning aws-samples/bedrock-access-gateway repository" git clone --depth 1 https://github.com/aws-samples/bedrock-access-gateway $REPO_DIR fi cp -r $REPO_DIR/src/api app/api +cp $REPO_DIR/src/requirements.txt layer/requirements.txt -# To surpress warnings echo "" > app/requirements.txt -# Remove "Manum" from requirements.txt, as LWA is used instead. -grep -v "mangum" $REPO_DIR/src/requirements.txt > layer/requirements.txt -grep -v "Mangum" $REPO_DIR/src/api/app.py > app/api/app.py +# Apply patches +echo "Applying patches" +(cd $REPO_DIR/src && patch -p1 < ../../../patches/auth.py.patch) +(cd $REPO_DIR/src && patch -p1 < ../../../patches/app.py.patch) +(cd $REPO_DIR/src && patch -p1 < ../../../patches/requirements.txt.patch) +(cd $REPO_DIR/src && patch -p1 < ../../../patches/pydantic-version.patch) -# Check if --no-embeddings flag is set if [ "$NO_EMBEDDINGS" = true ]; then - echo "Deleting embeddings related code and dependencies" - - # Apply patterns to specific files directly - - # For app/api/models/bedrock.py - sed_edit '/^import numpy/d' "app/api/models/bedrock.py" - sed_edit '/^import tiktoken/d' "app/api/models/bedrock.py" - sed_edit '/^ENCODER = /d' "app/api/models/bedrock.py" - # This removes the final part which consists of embedding related code - sed_edit '/^class BedrockEmbeddingsModel/,$d' "app/api/models/bedrock.py" - - # For app/api/app.py - # Remove import of the embeddings model - sed_edit 's/, embeddings//g' "app/api/app.py" - # Remove the route for embeddings - sed_edit '/embeddings.router/d' "app/api/app.py" - - # For layer/requirements.txt - sed_edit '/^tiktoken/d' "layer/requirements.txt" - sed_edit '/^numpy/d' "layer/requirements.txt" + echo "Applying no-embeddings patch" + (cd $REPO_DIR/src && patch -p1 < ../../../patches/no-embeddings.patch) fi -# Pydantic need to be >= 2.10.4 in order to fix a installation issue -sed_edit 's/pydantic==.*/pydantic>=2.10.4/g' layer/requirements.txt - # Update boto3/botocore to latest versions for pkg in boto3 botocore; do - # pip index versions is no longer experimental from pip 25.1 - VERSION=$(pip index versions $pkg | grep -m 1 "LATEST: " | awk '{print $2}') + VERSION=$(pip index versions $pkg 2>/dev/null | grep -m 1 "LATEST: " | awk '{print $2}') if [ -n "$VERSION" ]; then - sed_edit "s/$pkg==.*/$pkg==$VERSION/g" layer/requirements.txt + sed -i.bak "s/$pkg==.*/$pkg==$VERSION/g" layer/requirements.txt && rm layer/requirements.txt.bak fi done diff --git a/template-with-secrets-manager.yaml b/template-with-secrets-manager.yaml new file mode 100644 index 0000000..9de7356 --- /dev/null +++ b/template-with-secrets-manager.yaml @@ -0,0 +1,133 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: > + Bedrock Access Gateway (OpenAI API Schema) with FastAPI on AWS Lambda, + exposed as a public API with AWS Lambda Function URL. + Pay-as-you-go, no fixed infrastructure costs. + This version uses AWS Secrets Manager for API key storage. + +Parameters: + LambdaAdapterLayerVersion: + Type: Number + Description: "Version of the Lambda Adapter Layer to use" + Default: 25 + MinValue: 1 + ConstraintDescription: "Must be a positive integer" + PythonRuntime: + Type: String + Description: "Python runtime version to use" + Default: python3.12 + AllowedPattern: "^python3\\.([1-9][0-9]*)$" + ConstraintDescription: "Must be a valid Python 3.x runtime (e.g., python3.12)" + ApiKey: + Type: String + Description: "Pick a value of the API Key you want to be used in the Authorization header" + MinLength: 1 + NoEcho: true + DefaultModel: + Type: String + Description: "Default Bedrock model to use for inference" + Default: amazon.nova-premier-v1:0 + AllowedPattern: "([a-z]+\\.)?[a-z0-9-]+\\.[a-z0-9-]+(:\\d+)?" + ConstraintDescription: "Must be a valid Bedrock model ID, cross regioninference profiles are also supported (e.g., us.amazon.nova-premier-v1:0)" + Debug: + Type: String + Description: "Enable debug mode" + Default: "false" + AllowedValues: + - "true" + - "false" + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: "Infrastructure Configuration" + Parameters: + - LambdaAdapterLayerVersion + - PythonRuntime + - Label: + default: "Application Configuration" + Parameters: + - ApiKey + - DefaultModel + - Debug + +Globals: + Function: + MemorySize: 512 + Timeout: 150 + +Resources: + ApiKeySecret: + Type: AWS::SecretsManager::Secret + Properties: + Description: API Key for Bedrock Access Gateway + SecretString: !Sub '{"api_key":"${ApiKey}"}' + + BedrockAccessGatewayLayer: + Type: AWS::Serverless::LayerVersion + Properties: + ContentUri: layer/ + CompatibleRuntimes: + - !Ref PythonRuntime + CompatibleArchitectures: + - x86_64 + Metadata: + BuildMethod: !Ref PythonRuntime + + BedrockAccessGatewayFunction: + Type: AWS::Serverless::Function + Properties: + CodeUri: app/ + Handler: run.sh + Runtime: !Ref PythonRuntime + Architectures: + - x86_64 + Environment: + Variables: + API_KEY_SECRET_ARN: !Ref ApiKeySecret + AWS_LAMBDA_EXEC_WRAPPER: /opt/bootstrap + AWS_LWA_INVOKE_MODE: RESPONSE_STREAM + AWS_LWA_PORT: 8000 + DEBUG: !Ref Debug + DEFAULT_MODEL: !Ref DefaultModel + ENABLE_CROSS_REGION_INFERENCE: 'true' + Layers: + - !Ref BedrockAccessGatewayLayer + - !Sub "arn:aws:lambda:${AWS::Region}:753240598075:layer:LambdaAdapterLayerX86:${LambdaAdapterLayerVersion}" + FunctionUrlConfig: + AuthType: NONE + InvokeMode: RESPONSE_STREAM + Policies: + - Statement: + - Action: + - bedrock:ListFoundationModels + - bedrock:ListInferenceProfiles + Effect: Allow + Resource: "*" + - Action: + - bedrock:InvokeModel + - bedrock:InvokeModelWithResponseStream + Effect: Allow + Resource: + - arn:aws:bedrock:*::foundation-model/* + - arn:aws:bedrock:*:*:inference-profile/* + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Ref ApiKeySecret + +Outputs: + FunctionUrl: + Description: "Function URL for FastAPI function" + Value: !GetAtt BedrockAccessGatewayFunctionUrl.FunctionUrl + Function: + Description: "FastAPI Lambda Function ARN" + Value: !GetAtt BedrockAccessGatewayFunction.Arn + ApiKeySecretArn: + Description: "ARN of the API Key secret in Secrets Manager" + Value: !Ref ApiKeySecret + SampleCurlCommand: + Description: "Sample curl command to list available models" + Value: !Sub "curl -X GET ${BedrockAccessGatewayFunctionUrl.FunctionUrl}api/v1/models -H 'Authorization: Bearer '"