From 6d004708c6cdcf780c241cb70cb41b57eeffa25e Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Mon, 23 Jun 2025 13:36:27 -0400 Subject: [PATCH] update imports and readme --- README.md | 5 +- .../02_semantic_cache_optimization.ipynb | 315 +- .../01_routing_optimization.ipynb | 14690 +++++++++++++++- 3 files changed, 14700 insertions(+), 310 deletions(-) diff --git a/README.md b/README.md index 58b819a8..b85f36c2 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ An estimated 31% of LLM queries are potentially redundant ([source](https://arxi | --- | --- | --- | | 🧠 **Gemini Semantic Cache** - Build a semantic cache with Redis and Google Gemini | [![Open In GitHub](https://img.shields.io/badge/View-GitHub-green)](python-recipes/semantic-cache/00_semantic_caching_gemini.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/semantic-cache/00_semantic_caching_gemini.ipynb) | | 🦙 **Llama3.1 Doc2Cache** - Build a semantic cache using the Doc2Cache framework and Llama3.1 | [![Open In GitHub](https://img.shields.io/badge/View-GitHub-green)](python-recipes/semantic-cache/01_doc2cache_llama3_1.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/semantic-cache/01_doc2cache_llama3_1.ipynb) | -| ⚙️ **Cache Optimization** - Use CacheThresholdOptimizer from redisvl to setup best cache config | [![Open In GitHub](https://img.shields.io/badge/View-GitHub-green)](python-recipes/semantic-cache/02_semantic_cache_optimization.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/semantic-cache/02_semantic_cache_optimization.ipynb) | +| ⚙️ **Cache Optimization** - Use CacheThresholdOptimizer from [redis-retrieval-optimizer](https://pypi.org/project/redis-retrieval-optimizer/) to setup best cache config | [![Open In GitHub](https://img.shields.io/badge/View-GitHub-green)](python-recipes/semantic-cache/02_semantic_cache_optimization.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/semantic-cache/02_semantic_cache_optimization.ipynb) | ### Semantic Routing Routing is a simple and effective way of preventing misuse with your AI application or for creating branching logic between data sources etc. @@ -106,7 +106,7 @@ Routing is a simple and effective way of preventing misuse with your AI applicat | Recipe | GitHub | Google Colab | | --- | --- | --- | | 🔀 **Basic Routing** - Simple examples of how to build an allow/block list router in addition to a multi-topic router | [![Open In GitHub](https://img.shields.io/badge/View-GitHub-green)](python-recipes/semantic-router/00_semantic_routing.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/semantic-router/00_semantic_routing.ipynb) | -| ⚙️ **Router Optimization** - Use RouterThresholdOptimizer from redisvl to setup best router config | [![Open In GitHub](https://img.shields.io/badge/View-GitHub-green)](python-recipes/semantic-router/01_routing_optimization.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/semantic-router/01_routing_optimization.ipynb) | +| ⚙️ **Router Optimization** - Use RouterThresholdOptimizer from [redis-retrieval-optimizer](https://pypi.org/project/redis-retrieval-optimizer/) to setup best router config | [![Open In GitHub](https://img.shields.io/badge/View-GitHub-green)](python-recipes/semantic-router/01_routing_optimization.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/semantic-router/01_routing_optimization.ipynb) | ### AI Gateways @@ -203,6 +203,7 @@ Redis integrates with many different players in the AI ecosystem. Here's a curat - [Benchmarking results for vector databases](https://redis.io/blog/benchmarking-results-for-vector-databases/) - Benchmarking results for vector databases, including Redis and 7 other Vector Database players. - [Redis Vector Library Docs](https://docs.redisvl.com) - [Redis Vector Search API Docs](https://redis.io/docs/interact/search-and-query/advanced-concepts/vectors/) - Official Redis literature for Vector Similarity Search. +- [Redis Retrieval Optimizer](https://pypi.org/project/redis-retrieval-optimizer/) - Library for optimizing index, embedding, and search method usage within Redis.
diff --git a/python-recipes/semantic-cache/02_semantic_cache_optimization.ipynb b/python-recipes/semantic-cache/02_semantic_cache_optimization.ipynb index f7933d63..01b12317 100644 --- a/python-recipes/semantic-cache/02_semantic_cache_optimization.ipynb +++ b/python-recipes/semantic-cache/02_semantic_cache_optimization.ipynb @@ -8,7 +8,7 @@ "\n", "# Optimize semantic cache threshold with RedisVL\n", "\n", - "> **Note:** Threshold optimization in redisvl relies on `python > 3.9.`\n", + "> **Note:** Threshold optimization with redis-retrieval-optimizer relies on `python > 3.9.`\n", "\n", "\"Open" ] @@ -26,7 +26,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install \"redisvl>=0.6.0\"" + "%pip install \"redisvl>=0.6.0\" \"redis-retrieval-optimizer>=0.2.0\"" ] }, { @@ -80,22 +80,71 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", + "output_type": "stream", + "text": [ + "13:32:11 [RedisVL] WARNING The default vectorizer has changed from `sentence-transformers/all-mpnet-base-v2` to `redis/langcache-embed-v1` in version 0.6.0 of RedisVL. For more information about this model, please refer to https://arxiv.org/abs/2504.02268 or visit https://huggingface.co/redis/langcache-embed-v1. To continue using the old vectorizer, please specify it explicitly in the constructor as: vectorizer=HFTextVectorizer(model='sentence-transformers/all-mpnet-base-v2')\n", + "13:32:11 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", + "13:32:11 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: redis/langcache-embed-v1\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "abd298f873404faba441d8be98e2c9de", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/1 [00:00 2\u001b[0m optimizer \u001b[38;5;241m=\u001b[39m \u001b[43mCacheThresholdOptimizer\u001b[49m(sem_cache, test_data, eval_metric\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprecision\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m optimizer\u001b[38;5;241m.\u001b[39moptimize()\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDistance threshold after: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msem_cache\u001b[38;5;241m.\u001b[39mdistance_threshold\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'CacheThresholdOptimizer' is not defined" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "38e31141494048c79c809a6e096442ae", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/1 [00:00 Threshold optimizers are scheduled for release with `0.5.0` so we will pull directly from that branch for the time being." + "## Install Packages" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "c620286e", "metadata": {}, "outputs": [ @@ -39,12 +38,15 @@ "name": "stdout", "output_type": "stream", "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ - "%pip install -q sentence-transformers ranx \"redisvl>=0.6.0\"" + "%pip install -q sentence-transformers ranx \"redisvl>=0.6.0\" \"redis-retrieval-optimizer>=0.2.0\"" ] }, { @@ -120,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "aefda1d1", "metadata": {}, "outputs": [], @@ -160,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "60ad280c", "metadata": {}, "outputs": [], @@ -234,10 +236,83 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "e80aaf84", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13:22:06 datasets INFO PyTorch version 2.3.0 available.\n", + "13:22:06 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", + "13:22:06 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6345d6b8899347ec9c3eac71442f2bd1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/1 [00:00 str:\n", - " prompt = f\"\"\"\n", - " You are a classification bot. Your job is to classify the following query as either faq, general, blocked, or none. Return only the string label or an empty string if no match.\n", - "\n", - " general is defined as request requiring customer service.\n", - " faq is defined as a request for commonly asked account questions.\n", - " blocked is defined as a request for prohibited information.\n", - "\n", - " query: \"{question}\"\n", - " \"\"\"\n", - " response = client.responses.create(\n", - " model=\"gpt-4o-mini\",\n", - " input=prompt,\n", - " )\n", - " return response" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "feb25546", - "metadata": {}, - "outputs": [], - "source": [ - "with open(\"resources/ecom_test_data.json\", \"r\") as f:\n", - " test_data = json.load(f)\n", - "\n", - "\n", - "res = ask_openai(test_data[0][\"query\"])\n", - "res.output_text" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "5ee72be1", - "metadata": {}, - "outputs": [ + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a7825e73ad0647f0a84d5f7f4db318e1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/1 [00:00 str:\n", + " prompt = f\"\"\"\n", + " You are a classification bot. Your job is to classify the following query as either faq, general, blocked, or none. Return only the string label or an empty string if no match.\n", + "\n", + " general is defined as request requiring customer service.\n", + " faq is defined as a request for commonly asked account questions.\n", + " blocked is defined as a request for prohibited information.\n", + "\n", + " query: \"{question}\"\n", + " \"\"\"\n", + " response = client.responses.create(\n", + " model=\"gpt-4o-mini\",\n", + " input=prompt,\n", + " )\n", + " return response" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "feb25546", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13:23:11 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/plain": [ + "'faq'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open(\"resources/ecom_test_data.json\", \"r\") as f:\n", + " test_data = json.load(f)\n", + "\n", + "\n", + "res = ask_openai(test_data[0][\"query\"])\n", + "res.output_text" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5ee72be1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_tokens': 99,\n", + " 'input_tokens_details': {'cached_tokens': 0},\n", + " 'output_tokens': 2,\n", + " 'output_tokens_details': {'reasoning_tokens': 0},\n", + " 'total_tokens': 101}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res.usage.model_dump()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "e5c921b2", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "INPUT_TOKEN_PRICE = (0.15 / 1_000_000)\n", + "OUTPUT_TOKEN_PRICE = (0.60 / 1_000_000)\n", + "\n", + "def calc_cost_rough(openai_response):\n", + " return openai_response.usage.input_tokens * INPUT_TOKEN_PRICE + openai_response.usage.output_tokens * OUTPUT_TOKEN_PRICE\n", + "\n", + "def test_classifier(classifier, test_data, is_router=False):\n", + " correct = 0\n", + " times = []\n", + " costs = []\n", + "\n", + " for data in test_data:\n", + " start = time.time()\n", + " if is_router:\n", + " prediction = classifier(data[\"query\"]).name\n", + " else:\n", + " openai_response = ask_openai(data[\"query\"])\n", + " prediction = openai_response.output_text\n", + " costs.append(calc_cost_rough(openai_response))\n", + " \n", + " if not prediction or prediction.lower() == \"none\":\n", + " prediction = \"\"\n", + "\n", + " times.append(time.time() - start)\n", + " print(f\"Expected | Observed: {data['query_match']} | {prediction.lower()}\")\n", + " if prediction.lower() == data[\"query_match\"]:\n", + " correct += 1\n", + "\n", + " accuracy = correct / len(test_data)\n", + " avg_time = np.mean(times)\n", + " cost = np.sum(costs) if costs else 0\n", + " return accuracy, avg_time, round(cost, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5c6024e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13:23:43 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: faq | faq\n", + "13:23:43 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: faq | faq\n", + "13:23:44 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: faq | faq\n", + "13:23:44 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: faq | faq\n", + "13:23:45 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: faq | general\n", + "13:23:45 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: faq | faq\n", + "13:23:46 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: general | general\n", + "13:23:46 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: general | general\n", + "13:23:47 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: general | general\n", + "13:23:47 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: general | general\n", + "13:23:48 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: general | general\n", + "13:23:48 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: general | general\n", + "13:23:49 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", "Expected | Observed: blocked | \n", + "13:23:49 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: blocked | blocked\n", + "13:23:50 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", "Expected | Observed: blocked | blocked\n", + "13:23:50 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: blocked | general\n", + "13:23:51 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", "Expected | Observed: blocked | blocked\n", + "13:23:52 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", "Expected | Observed: blocked | blocked\n", + "13:23:52 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: blocked | \n", + "13:23:53 httpx INFO HTTP Request: POST https://api.openai.com/v1/responses \"HTTP/1.1 200 OK\"\n", + "Expected | Observed: blocked | blocked\n" + ] + } + ], + "source": [ + "llm_accuracy, llm_avg_time, llm_cost = test_classifier(ask_openai, test_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c3362a1b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.8, 0.5609435558319091, 0.0003)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llm_accuracy, llm_avg_time, llm_cost" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "40ddc05d", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "65740a8a0b094a68aea0d31fd3c6d87a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/1 [00:00