|
| 1 | +#!/bin/bash |
| 2 | +# This file is the orchestrator of workflow to ship it to cloud |
| 3 | +# Functionality: |
| 4 | +# 1) Checks availability for resource in each cloud services. |
| 5 | +# 2) Deploy in a VM that is available; else, schedule-for-later/exit. |
| 6 | +# |
| 7 | +# Author: Abhishek Sriram <noobsiecoder@gmail.com> |
| 8 | +# Date: Aug 21st, 2025 |
| 9 | +# Place: Boston, MA |
| 10 | +set -e |
| 11 | + |
| 12 | +# ==================== AWS credentials ===================== |
| 13 | +# TODO: AWS Credentials not used as quota wasn't increased yet (Recorded on: Aug 22nd, 2025) |
| 14 | + |
| 15 | +# ==================== Azure credentials =================== |
| 16 | +AZURE_USERNAME="$secrets.AZURE_USERNAME" |
| 17 | +AZURE_PASSWORD="$secrets.AZURE_APP_ID" |
| 18 | +AZURE_TENANT="$secrets.AZURE_TENANT" |
| 19 | +AZURE_RESOURCE_GROUP="$secrets.AZURE_RESOURCE_GROUP" |
| 20 | +AZURE_VM_INSTANCE="$secrets.AZURE_VM_INSTANCE" |
| 21 | + |
| 22 | +# =================== GCP credentials ===================== |
| 23 | +GCP_TYPE="$secrets.GCP_TYPE" |
| 24 | +GCP_PRIVATE_KEY_ID="$secrets.GCP_PRIVATE_KEY_ID" |
| 25 | +GCP_PROJECT_ID="$secrets.GCP_PROJECT_ID" |
| 26 | +GCP_PRIVATE_KEY="$secrets.GCP_PRIVATE_KEY" |
| 27 | +GCP_CLIENT_EMAIL="$secrets.GCP_CLIENT_EMAIL" |
| 28 | +GCP_CLIENT_ID="$secrets.GCP_CLIENT_ID" |
| 29 | +GCP_AUTH_URI="$secrets.GCP_AUTH_URI" |
| 30 | +GCP_TOKEN_URI="$secrets.GCP_TOKEN_URI" |
| 31 | +GCP_CERT="$secrets.GCP_CERT" |
| 32 | +GCP_CERT_URI="$secrets.GCP_CERT_URI" |
| 33 | +GCP_DOMAIN="$secrets.GCP_DOMAIN" |
| 34 | + |
| 35 | +FILE_ENTRYPOINT="~/VeriGenLLM-v2/main.py" |
| 36 | + |
| 37 | +# TODO: Yet to work on AWS VMs -> Waiting on quota increase |
| 38 | +# Function to check AWS VM |
| 39 | + |
| 40 | + |
| 41 | +# Function to check AZURE VM |
| 42 | +# TODO: Instead of the pytho script, check dockerfile |
| 43 | +check_azure() { |
| 44 | + # Checking Azure container |
| 45 | + echo "Checking Azure VM for running Docker containers..." |
| 46 | + |
| 47 | + # Login to Azure |
| 48 | + az login --service-principal -u $AZURE_USERNAME -p $AZURE_PASSWORD --tenant $AZURE_TENANT > /dev/null 2>&1 |
| 49 | + |
| 50 | + # Check if any Docker containers are running (excluding system containers) |
| 51 | + # This command will: |
| 52 | + # 1. List all running containers with their names and images |
| 53 | + # 2. Exclude the Docker daemon and system containers |
| 54 | + # 3. Look for actual application containers |
| 55 | + local check_script=' |
| 56 | + # Get list of running containers |
| 57 | + running_containers=$(docker ps --format "table {{.Names}}\t{{.Image}}\t{{.Status}}" 2>/dev/null | tail -n +2) |
| 58 | + |
| 59 | + if [ -z "$running_containers" ]; then |
| 60 | + echo "NO_CONTAINERS" |
| 61 | + exit 0 |
| 62 | + fi |
| 63 | + |
| 64 | + # Check if any non-system containers are running |
| 65 | + # Exclude common system containers and daemons |
| 66 | + app_containers=$(echo "$running_containers" | grep -v -E "^(portainer|watchtower|traefik|nginx-proxy|docker-proxy|registry)" | grep -v -E "(daemon|system)") |
| 67 | + |
| 68 | + if [ -z "$app_containers" ]; then |
| 69 | + echo "NO_APP_CONTAINERS" |
| 70 | + exit 0 |
| 71 | + fi |
| 72 | + |
| 73 | + # Check specifically for VeriGenLLM-v2 related containers |
| 74 | + verigen_containers=$(echo "$app_containers" | grep -i "verigen\|llm\|rlft") |
| 75 | + |
| 76 | + if [ ! -z "$verigen_containers" ]; then |
| 77 | + echo "VERIGEN_RUNNING" |
| 78 | + echo "$verigen_containers" |
| 79 | + exit 0 |
| 80 | + fi |
| 81 | + |
| 82 | + # Other app containers are running |
| 83 | + echo "OTHER_APPS_RUNNING" |
| 84 | + echo "$app_containers" |
| 85 | + exit 0 |
| 86 | + ' |
| 87 | + |
| 88 | + # Execute the check script on Azure VM |
| 89 | + local OUTPUT=$(az vm run-command invoke \ |
| 90 | + -g $AZURE_RESOURCE_GROUP \ |
| 91 | + -n $AZURE_VM_INSTANCE \ |
| 92 | + --command-id RunShellScript \ |
| 93 | + --scripts "$check_script" \ |
| 94 | + --output json 2>&1) |
| 95 | + |
| 96 | + # Check if the command executed successfully |
| 97 | + if [ $? -ne 0 ]; then |
| 98 | + echo "✗ Failed to execute command on Azure VM" |
| 99 | + echo "Error: $OUTPUT" |
| 100 | + return 3 # VM unreachable or command failed |
| 101 | + fi |
| 102 | + |
| 103 | + # Parse the output |
| 104 | + local stdout_content=$(echo "$OUTPUT" | jq -r '.value[0].message' 2>/dev/null | grep -oP '\[stdout\]\K.*' | sed 's/\\n/\n/g') |
| 105 | + |
| 106 | + # Determine the status based on output |
| 107 | + if echo "$stdout_content" | grep -q "NO_CONTAINERS"; then |
| 108 | + echo "✓ No Docker containers running on Azure VM - VM is available" |
| 109 | + return 1 # VM available for deployment |
| 110 | + elif echo "$stdout_content" | grep -q "NO_APP_CONTAINERS"; then |
| 111 | + echo "✓ No application containers running on Azure VM - VM is available" |
| 112 | + return 1 # VM available for deployment |
| 113 | + elif echo "$stdout_content" | grep -q "VERIGEN_RUNNING"; then |
| 114 | + echo "✗ VeriGenLLM-v2 is already running on Azure VM" |
| 115 | + # Extract and display the container details |
| 116 | + local container_info=$(echo "$stdout_content" | grep -A 10 "VERIGEN_RUNNING" | tail -n +2 | head -n -1) |
| 117 | + echo "Running containers:" |
| 118 | + echo "$container_info" |
| 119 | + return 0 # VM busy with our application |
| 120 | + elif echo "$stdout_content" | grep -q "OTHER_APPS_RUNNING"; then |
| 121 | + echo "⚠ Other applications are running on Azure VM" |
| 122 | + # Extract and display the container details |
| 123 | + local container_info=$(echo "$stdout_content" | grep -A 10 "OTHER_APPS_RUNNING" | tail -n +2 | head -n -1) |
| 124 | + echo "Running containers:" |
| 125 | + echo "$container_info" |
| 126 | + return 0 # VM busy with other applications |
| 127 | + else |
| 128 | + echo "✗ Unable to determine Azure VM status" |
| 129 | + return 3 # Unknown status |
| 130 | + fi |
| 131 | +} |
| 132 | + |
| 133 | +# Function to check GCP VM |
| 134 | +# TODO: Instead of the pytho script, check dockerfile |
| 135 | +check_gcp() { |
| 136 | + # Replace literal '\n' with actual newlines in private key |
| 137 | + local FIXED_PRIVATE_KEY |
| 138 | + FIXED_PRIVATE_KEY=$(echo "$GCP_PRIVATE_KEY" | sed 's/\\n/\n/g') |
| 139 | + |
| 140 | + # Write security object to /tmp/gcp-secret.json |
| 141 | + cat > /tmp/gcp-secret.json <<EOF |
| 142 | + { |
| 143 | + "type": "$GCP_TYPE", |
| 144 | + "project_id": "$GCP_PROJECT_ID", |
| 145 | + "private_key_id": "$GCP_PRIVATE_KEY_ID", |
| 146 | + "private_key": "$FIXED_PRIVATE_KEY", |
| 147 | + "client_email": "$GCP_CLIENT_EMAIL", |
| 148 | + "client_id": "$GCP_CLIENT_ID", |
| 149 | + "auth_uri": "$GCP_AUTH_URI", |
| 150 | + "token_uri": "$GCP_TOKEN_URI", |
| 151 | + "auth_provider_x509_cert_url": "$GCP_CERT", |
| 152 | + "client_x509_cert_url": "$GCP_CERT_URI", |
| 153 | + "universe_domain": "$GCP_DOMAIN" |
| 154 | + } |
| 155 | +EOF |
| 156 | + |
| 157 | + export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp-secret.json" |
| 158 | + |
| 159 | + # Run command on VM |
| 160 | + local output=$(gcloud compute ssh $GCP_INSTANCE_NAME \ |
| 161 | + --zone=$GCP_INSTANCE_ZONE \ |
| 162 | + --command="pgrep -af '$FILE_ENTRYPOINT'" \ |
| 163 | + --ssh-flag="-o ConnectTimeout=10" \ |
| 164 | + 2>&1) |
| 165 | + |
| 166 | + if [ $? -eq 0 ] && [ ! -z "$output" ]; then |
| 167 | + echo "✓ Script is running" |
| 168 | + echo "Process info: $output" |
| 169 | + return 0 |
| 170 | + else |
| 171 | + echo "✗ Script is not running" |
| 172 | + return 1 |
| 173 | + fi |
| 174 | +} |
| 175 | + |
| 176 | +# Main Runner |
| 177 | +main() {} |
| 178 | + |
| 179 | +main # Run main function |
0 commit comments