From c11c1e17d387b7c27c6add3aa1a99c821aaf4805 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 9 Oct 2024 16:46:41 -0700
Subject: [PATCH 01/12] wip: first pass at working collaborative filtering in
 redisvl

---
 .../collaborative_filtering.ipynb             | 1095 +++++++++++++++++
 .../collaborative_filtering_schema.yaml       |   40 +
 .../recommendation-systems/user_schema.yaml   |   18 +
 3 files changed, 1153 insertions(+)
 create mode 100644 python-recipes/recommendation-systems/collaborative_filtering.ipynb
 create mode 100644 python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
 create mode 100644 python-recipes/recommendation-systems/user_schema.yaml

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
new file mode 100644
index 00000000..26a4de60
--- /dev/null
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -0,0 +1,1095 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Collaborative Filtering in RedisVL"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Recommendation systems are a common application of machine learning and serve many industries from e-commerce to music streaming platforms.\n",
+    "\n",
+    "There are many different architechtures that can be followed to build a recommender system. \n",
+    "\n",
+    "In this notebook we'll demonstrate how to build a [content filtering](https://en.wikipedia.org/wiki/Recommender_system#:~:text=of%20hybrid%20systems.-,Content%2Dbased%20filtering,-%5Bedit%5D)\n",
+    "recommender and use the movies dataset as our example data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install scikit-surprise --quiet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## IMPORTS\n",
+    "import os\n",
+    "import requests\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from surprise import SVD\n",
+    "from surprise import Dataset, Reader\n",
+    "from surprise.model_selection import train_test_split\n",
+    "\n",
+    "\n",
+    "# Replace values below with your own if using Redis Cloud instance\n",
+    "REDIS_HOST = os.getenv(\"REDIS_HOST\", \"localhost\") # ex: \"redis-18374.c253.us-central1-1.gce.cloud.redislabs.com\"\n",
+    "REDIS_PORT = os.getenv(\"REDIS_PORT\", \"6379\")      # ex: 18374\n",
+    "REDIS_PASSWORD = os.getenv(\"REDIS_PASSWORD\", \"\")  # ex: \"1TNxTEdYRDgIDKM2gDfasupCADXXXX\"\n",
+    "\n",
+    "# If SSL is enabled on the endpoint, use rediss:// as the URL prefix\n",
+    "REDIS_URL = f\"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "## EVALUATE MOVE TO COLLABORATIVE FILTERING SO WE CAN SHOW BETTER NUMBERS\n",
+    "#let's see how well this works. we can choose some users, and based on their first watched movie we can recommend them some more.\n",
+    "#we can then look at the set intersection between our recommendations and the movies they actually watched (and rated highly) to see how well we did."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## DONE\n",
+    "# clean up your index\n",
+    "\n",
+    "#while remaining := index.clear():\n",
+    "#    print(f\"Deleted {remaining} keys\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# YOLO FTW"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To build a collaborative filtering example using the Surprise library and the Movies dataset, we need to first load the data, format it according to the requirements of Surprise, and then apply a collaborative filtering algorithm like SVD.\n",
+    "\n",
+    "Since you mentioned a modified version of the dataset hosted on Kaggle, I’ll show you how to structure the code, assuming you have the dataset ready.\n",
+    "\n",
+    "Here’s an example:\n",
+    "\n",
+    "Step-by-Step Guide\n",
+    "Install necessary libraries: Ensure you have installed the Surprise library if you haven’t already."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Loading and Preparing the Data: Let’s assume the dataset contains at least two relevant files: ratings.csv (user, movie, rating) and movies.csv (movieId, title).\n",
+    "\n",
+    "You’ll need to load the ratings data and prepare it for use with Surprise."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fetch_dataframe(file_name):\n",
+    "    try:\n",
+    "        df = pd.read_csv('datasets/collaborative_filtering/' + file_name)\n",
+    "    except:\n",
+    "        url = 'https://redis-ai-resources.s3.us-east-2.amazonaws.com/recommenders/datasets/collaborative-filtering/'\n",
+    "        r = requests.get(url + file_name)\n",
+    "        if not os.path.exists('datasets/collaborative_filtering'):\n",
+    "            os.makedirs('datasets/collaborative_filtering')\n",
+    "        with open('datasets/collaborative_filtering/' + file_name, 'wb') as f:\n",
+    "            f.write(r.content)\n",
+    "        df = pd.read_csv('datasets/collaborative_filtering/' + file_name)\n",
+    "    return df\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ratings_file = 'ratings_small.csv'\n",
+    "\n",
+    "ratings_df = fetch_dataframe(ratings_file)\n",
+    "\n",
+    "# only keep the columns we need: userId, movieId, rating\n",
+    "ratings_df = ratings_df[['userId', 'movieId', 'rating']]\n",
+    "\n",
+    "reader = Reader(rating_scale=(0.0, 5.0))\n",
+    "\n",
+    "ratings_data = Dataset.load_from_df(ratings_df, reader)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training Our Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13a767e50>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# split the data into training and testing sets (80% train, 20% test)\n",
+    "train_set, test_set = train_test_split(ratings_data, test_size=0.2)\n",
+    "\n",
+    "# use SVD (Singular Value Decomposition) for collaborative filtering\n",
+    "svd_algo = SVD(biased=False)  # We'll set biased to False so that predictions are of the form \"rating_prediction = user_vector dot item_vector\"\n",
+    "\n",
+    "# train the algorithm on the train_set\n",
+    "svd_algo.fit(train_set)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A lot happened in the cell above. We split our full data into train and test sets. We defined the collaborative filtering algorithm to use, which in this case is the Singular Value Decomposition (SVD) algorithm. lastly, we fit our model to our data.\n",
+    "\n",
+    "It's worth going into more detail why we chose this algorithm and what it is computing in the `.fit(train_set)` method we're calling.\n",
+    "First, let's think about what data it's receiving - our ratings data. This only contains the user_ids, movie_ids, and the user's ratings of their watched movies on a scale of 1 to 5.\n",
+    "\n",
+    "We can put this data into a matrix with rows being users and columns being movies\n",
+    "\n",
+    "| RATINGS| movie_1 | movie_2 | movie_3 | movie_4 | movie_5 | movie_6 | ....... |\n",
+    "| -----  | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |\n",
+    "| user_1 |    4    |    1    |         |    4    |         |    5    |         |\n",
+    "| user_2 |         |    5    |    5    |    2    |    1    |         |         |\n",
+    "| user_3 |         |         |         |         |    1    |         |         |\n",
+    "| user_4 |    4    |    1    |         |    4    |         |    ?    |         |\n",
+    "| user_5 |         |    4    |    5    |    2    |         |         |         |\n",
+    "| ...... |         |         |         |         |         |         |         |\n",
+    "\n",
+    "Our empty cells aren't zero's their missing ratings, so `user_1` has never rated `movie_3`. They may like it or hate it."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Unlike Content Filtering, here we're only considering the ratings that users assign. We don't know the plot or genre or release year of any of these films.\n",
+    "But we can still build recommender by assuming that users have similar tastes to each other. As an intuitive example, we can see that `user_1` and `user_4` have very similar ratings on several movies, so we can assume that `user_4` will rate `movie_6` highly, just as `user_1` did. This is the idea behind collaborative filtering."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "That's the idea, but what about the math? Since we only have this matrix to work with what we want to do is decompose it into two constituent matrices.\n",
+    "Lets call our ratings matrix `[R]`. We want to find two other matrices, a user matrix `[U]`, and a movies matrix `[M]` that fit the equation:\n",
+    "\n",
+    "`[U] * [M] = [R]`\n",
+    "\n",
+    "`[U]` will look like:\n",
+    "|user_1_feature_1 | user_1_feature_2 | user_1_feature_3 | user_1_feature_4 | ... | user_1_feature_k |\n",
+    "| ----- | --------- | --------- | --------- | --- | --------- |\n",
+    "|user_2_feature_1 | user_2_feature_2 | user_2_feature_3 | user_2_feature_4 | ... | user_2_feature_k |\n",
+    "|user_3_feature_1 | user_3_feature_2 | user_3_feature_3 | user_3_feature_4 | ... | user_3_feature_k |\n",
+    "|  ...  | . | . | . | ... | . |\n",
+    "|user_N_feature_1 | user_N_feature_2 | user_N_feature_3 | user_N_feature_4 | ... | user_N_feature_k |\n",
+    "\n",
+    "`[M]` will look like:\n",
+    "\n",
+    "| movie_1_feature_1 | movie_2_feature_1 | movie_3_feature_1 | ... | movie_M_feature_1 |\n",
+    "| --- | --- | --- | --- | --- |\n",
+    "| movie_1_feature_2 | movie_2_feature_2 | movie_3_feature_2 | ... | movie_M_feature_1 |\n",
+    "| movie_1_feature_3 | movie_2_feature_3 | movie_3_feature_3 | ... | movie_M_feature_1 |\n",
+    "| movie_1_feature_4 | movie_2_feature_4 | movie_3_feature_4 | ... | movie_M_feature_1 |\n",
+    "|  ...  | . | . | ... | . |\n",
+    "| movie_1_feature_k | movie_2_feature_k | movie_3_feature_k | ... | movie_M_feature_k |\n",
+    "\n",
+    "\n",
+    "these features are called the latent features and are the values we're trying to find when we call the `.fit(training_data)` method. The algorithm that computes these features from our ratings matrix is the SVD algorithm."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Explanation:\n",
+    "Dataset Preparation: We load the ratings data and ensure it has the necessary format with userId, movieId, and rating columns.\n",
+    "Surprise Reader: This helps in specifying the format of the data.\n",
+    "SVD Algorithm: We use the SVD algorithm for collaborative filtering. It decomposes the user-item interaction matrix into the latent factors.\n",
+    "Accuracy: After training the model, we evaluate it using the RMSE (Root Mean Squared Error).\n",
+    "Next Steps:\n",
+    "You can experiment with different algorithms such as KNNBasic or NMF in the Surprise library.\n",
+    "If your dataset contains titles, you can join movies.csv to display movie names in recommendations.\n",
+    "Would you like more details on dataset preprocessing or any specific functionality in collaborative filtering?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To extract user and item (movie) vectors from an SVD model trained using Surprise and then store them in a Redis vector database, you'll need to:\n",
+    "\n",
+    "Extract the learned latent factors (user and item vectors) from the SVD model.\n",
+    "Use Redis-py (Python client for Redis) to store those vectors in Redis as vectors in a vector database (assuming you are using Redis with the RedisVector or RedisSearch modules).\n",
+    "Here’s how to do it:\n",
+    "\n",
+    "Step 1: Extract User and Item Vectors from the SVD Model\n",
+    "The Surprise SVD model stores user and item vectors (latent factors) in two attributes:\n",
+    "\n",
+    "algo.pu: user factors matrix (a matrix where each row corresponds to the latent factors of a user).\n",
+    "algo.qi: item factors matrix (a matrix where each row corresponds to the latent factors of an item/movie).\n",
+    "These matrices store the vectors in the latent space after training.\n",
+    "\n",
+    "Step 2: Save the Vectors in Redis\n",
+    "Redis stores vectors in vector databases, such as Redis' HNSW index for vector similarity search. You can store both user and movie vectors as hashes in Redis and then use them for similarity search or recommendations.\n",
+    "\n",
+    "Install Redis and Redis-py\n",
+    "Make sure you have Redis installed with vector support (RediSearch or RedisVL), and install the Redis-py package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(671, 100)\n",
+      "(8405, 100)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# step 1: extract vectors\n",
+    "user_vectors = svd_algo.pu  # user latent features (matrix)\n",
+    "movie_vectors = svd_algo.qi  # movie latent features (matrix)\n",
+    "\n",
+    "print(user_vectors.shape)\n",
+    "print(movie_vectors.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Explanation:\n",
+    "Extract Vectors:\n",
+    "\n",
+    "algo.pu gives you a matrix where each row corresponds to a user’s latent factors (user vector).\n",
+    "algo.qi gives you a matrix where each row corresponds to an item/movie’s latent factors (item vector).\n",
+    "Store in Redis:\n",
+    "\n",
+    "We store each vector under a unique Redis key (e.g., user:123, item:456).\n",
+    "The vector is stored as a hash in Redis with each dimension (dim_0, dim_1, etc.) being a field in the hash.\n",
+    "Step 3: Advanced Storage for Vector Similarity Search\n",
+    "If you want to store the vectors in a Redis vector search index (e.g., HNSW from RedisSearch for vector similarity queries), you would follow the Redis commands for indexing:\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "user: 347        item: 5515       r_ui = None   est = 1.42   {'was_impossible': False}\n",
+      "1.4150893670982523\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Prediction(uid=347, iid=5515, r_ui=None, est=1.4150893670982523, details={'was_impossible': False})"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print(svd_algo.predict(347, 5515))\n",
+    "\n",
+    "inner_uid = train_set.to_inner_uid(347)\n",
+    "inner_iid = train_set.to_inner_iid(5515)\n",
+    "print(np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])) # surpirse casts userId and movieId to inner ids\n",
+    "svd_algo.predict(347, 5515)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "while our collaborative filtering algorithm was trained solely on user's ratings of movies, and doesn't require any data about the movies themselves - like the title, genres, or release year - we'll want that information stored as metadata.\n",
+    "\n",
+    "We can grab this data from our `movies_metadata.csv` file, clean it, and join it to our user ratings via the `movieId` column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>belongs_to_collection</th>\n",
+       "      <th>budget</th>\n",
+       "      <th>genres</th>\n",
+       "      <th>homepage</th>\n",
+       "      <th>id</th>\n",
+       "      <th>imdb_id</th>\n",
+       "      <th>original_language</th>\n",
+       "      <th>original_title</th>\n",
+       "      <th>overview</th>\n",
+       "      <th>popularity</th>\n",
+       "      <th>...</th>\n",
+       "      <th>release_date</th>\n",
+       "      <th>revenue</th>\n",
+       "      <th>runtime</th>\n",
+       "      <th>spoken_languages</th>\n",
+       "      <th>status</th>\n",
+       "      <th>tagline</th>\n",
+       "      <th>title</th>\n",
+       "      <th>video</th>\n",
+       "      <th>vote_average</th>\n",
+       "      <th>vote_count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>{'id': 10194, 'name': 'Toy Story Collection', ...</td>\n",
+       "      <td>30000000</td>\n",
+       "      <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
+       "      <td>http://toystory.disney.com/toy-story</td>\n",
+       "      <td>862</td>\n",
+       "      <td>tt0114709</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Toy Story</td>\n",
+       "      <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
+       "      <td>21.946943</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1995-10-30</td>\n",
+       "      <td>373554033</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Toy Story</td>\n",
+       "      <td>False</td>\n",
+       "      <td>7.7</td>\n",
+       "      <td>5415</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>65000000</td>\n",
+       "      <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>8844</td>\n",
+       "      <td>tt0113497</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Jumanji</td>\n",
+       "      <td>When siblings Judy and Peter discover an encha...</td>\n",
+       "      <td>17.015539</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1995-12-15</td>\n",
+       "      <td>262797249</td>\n",
+       "      <td>104.0</td>\n",
+       "      <td>[{'iso_639_1': 'en', 'name': 'English'}, {'iso...</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Roll the dice and unleash the excitement!</td>\n",
+       "      <td>Jumanji</td>\n",
+       "      <td>False</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>2413</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>{'id': 119050, 'name': 'Grumpy Old Men Collect...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>15602</td>\n",
+       "      <td>tt0113228</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Grumpier Old Men</td>\n",
+       "      <td>A family wedding reignites the ancient feud be...</td>\n",
+       "      <td>11.712900</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1995-12-22</td>\n",
+       "      <td>0</td>\n",
+       "      <td>101.0</td>\n",
+       "      <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Still Yelling. Still Fighting. Still Ready for...</td>\n",
+       "      <td>Grumpier Old Men</td>\n",
+       "      <td>False</td>\n",
+       "      <td>6.5</td>\n",
+       "      <td>92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>16000000</td>\n",
+       "      <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>31357</td>\n",
+       "      <td>tt0114885</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Waiting to Exhale</td>\n",
+       "      <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
+       "      <td>3.859495</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1995-12-22</td>\n",
+       "      <td>81452156</td>\n",
+       "      <td>127.0</td>\n",
+       "      <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Friends are the people who let you be yourself...</td>\n",
+       "      <td>Waiting to Exhale</td>\n",
+       "      <td>False</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>{'id': 96871, 'name': 'Father of the Bride Col...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11862</td>\n",
+       "      <td>tt0113041</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Father of the Bride Part II</td>\n",
+       "      <td>Just when George Banks has recovered from his ...</td>\n",
+       "      <td>8.387519</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1995-02-10</td>\n",
+       "      <td>76578911</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Just When His World Is Back To Normal... He's ...</td>\n",
+       "      <td>Father of the Bride Part II</td>\n",
+       "      <td>False</td>\n",
+       "      <td>5.7</td>\n",
+       "      <td>173</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 23 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                               belongs_to_collection    budget  \\\n",
+       "0  {'id': 10194, 'name': 'Toy Story Collection', ...  30000000   \n",
+       "1                                                NaN  65000000   \n",
+       "2  {'id': 119050, 'name': 'Grumpy Old Men Collect...         0   \n",
+       "3                                                NaN  16000000   \n",
+       "4  {'id': 96871, 'name': 'Father of the Bride Col...         0   \n",
+       "\n",
+       "                                              genres  \\\n",
+       "0  [{'id': 16, 'name': 'Animation'}, {'id': 35, '...   \n",
+       "1  [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...   \n",
+       "2  [{'id': 10749, 'name': 'Romance'}, {'id': 35, ...   \n",
+       "3  [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...   \n",
+       "4                     [{'id': 35, 'name': 'Comedy'}]   \n",
+       "\n",
+       "                               homepage     id    imdb_id original_language  \\\n",
+       "0  http://toystory.disney.com/toy-story    862  tt0114709                en   \n",
+       "1                                   NaN   8844  tt0113497                en   \n",
+       "2                                   NaN  15602  tt0113228                en   \n",
+       "3                                   NaN  31357  tt0114885                en   \n",
+       "4                                   NaN  11862  tt0113041                en   \n",
+       "\n",
+       "                original_title  \\\n",
+       "0                    Toy Story   \n",
+       "1                      Jumanji   \n",
+       "2             Grumpier Old Men   \n",
+       "3            Waiting to Exhale   \n",
+       "4  Father of the Bride Part II   \n",
+       "\n",
+       "                                            overview  popularity  ...  \\\n",
+       "0  Led by Woody, Andy's toys live happily in his ...   21.946943  ...   \n",
+       "1  When siblings Judy and Peter discover an encha...   17.015539  ...   \n",
+       "2  A family wedding reignites the ancient feud be...   11.712900  ...   \n",
+       "3  Cheated on, mistreated and stepped on, the wom...    3.859495  ...   \n",
+       "4  Just when George Banks has recovered from his ...    8.387519  ...   \n",
+       "\n",
+       "  release_date    revenue runtime  \\\n",
+       "0   1995-10-30  373554033    81.0   \n",
+       "1   1995-12-15  262797249   104.0   \n",
+       "2   1995-12-22          0   101.0   \n",
+       "3   1995-12-22   81452156   127.0   \n",
+       "4   1995-02-10   76578911   106.0   \n",
+       "\n",
+       "                                    spoken_languages    status  \\\n",
+       "0           [{'iso_639_1': 'en', 'name': 'English'}]  Released   \n",
+       "1  [{'iso_639_1': 'en', 'name': 'English'}, {'iso...  Released   \n",
+       "2           [{'iso_639_1': 'en', 'name': 'English'}]  Released   \n",
+       "3           [{'iso_639_1': 'en', 'name': 'English'}]  Released   \n",
+       "4           [{'iso_639_1': 'en', 'name': 'English'}]  Released   \n",
+       "\n",
+       "                                             tagline  \\\n",
+       "0                                                NaN   \n",
+       "1          Roll the dice and unleash the excitement!   \n",
+       "2  Still Yelling. Still Fighting. Still Ready for...   \n",
+       "3  Friends are the people who let you be yourself...   \n",
+       "4  Just When His World Is Back To Normal... He's ...   \n",
+       "\n",
+       "                         title  video vote_average vote_count  \n",
+       "0                    Toy Story  False          7.7       5415  \n",
+       "1                      Jumanji  False          6.9       2413  \n",
+       "2             Grumpier Old Men  False          6.5         92  \n",
+       "3            Waiting to Exhale  False          6.1         34  \n",
+       "4  Father of the Bride Part II  False          5.7        173  \n",
+       "\n",
+       "[5 rows x 23 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "movies_df = fetch_dataframe('movies_metadata.csv')\n",
+    "movies_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "budget               0\n",
+       "genres               0\n",
+       "id                   0\n",
+       "imdb_id              0\n",
+       "original_language    0\n",
+       "overview             0\n",
+       "popularity           0\n",
+       "release_date         0\n",
+       "revenue              0\n",
+       "runtime              0\n",
+       "status               0\n",
+       "tagline              0\n",
+       "title                0\n",
+       "vote_average         0\n",
+       "vote_count           0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "import datetime\n",
+    "movies_df.drop(columns=['homepage', 'production_countries', 'production_companies', 'spoken_languages', 'video', 'original_title', 'video', 'poster_path', 'belongs_to_collection'], inplace=True)\n",
+    "\n",
+    "# drop rows that have missing values\n",
+    "movies_df.dropna(subset=['imdb_id'], inplace=True)\n",
+    "\n",
+    "movies_df['original_language'] = movies_df['original_language'].fillna('unknown')\n",
+    "movies_df['overview'] = movies_df['overview'].fillna('')\n",
+    "movies_df['popularity'] = movies_df['popularity'].fillna(0)\n",
+    "movies_df['release_date'] = movies_df['release_date'].fillna('1900-01-01').apply(lambda x: datetime.datetime.strptime(x, \"%Y-%m-%d\").timestamp())\n",
+    "movies_df['revenue'] = movies_df['revenue'].fillna(0) # fill with average?\n",
+    "movies_df['runtime'] = movies_df['runtime'].fillna(0) # fill with average?\n",
+    "movies_df['status'] = movies_df['status'].fillna('unknown')\n",
+    "movies_df['tagline'] = movies_df['tagline'].fillna('')\n",
+    "movies_df['title'] = movies_df['title'].fillna('')\n",
+    "movies_df['vote_average'] = movies_df['vote_average'].fillna(0)\n",
+    "movies_df['vote_count'] = movies_df['vote_count'].fillna(0)\n",
+    "movies_df['genres'] = movies_df['genres'].apply(lambda x: [g['name'] for g in eval(x)] if x != '' else []) # convert to a list of genre names\n",
+    "movies_df['imdb_id'] = movies_df['imdb_id'].apply(lambda x: x[2:] if str(x).startswith('tt') else x).astype(int) # remove leading 'tt' from imdb_id\n",
+    "\n",
+    "# make sure we've filled all missing values\n",
+    "movies_df.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll eventually have to map these movies to their ratings, which we'll do so with the `links.csv` file that matches `movieId`, `imdbId`, and `tmdbId`.\n",
+    "\n",
+    "Let's do that now."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>budget</th>\n",
+       "      <th>genres</th>\n",
+       "      <th>id</th>\n",
+       "      <th>imdb_id</th>\n",
+       "      <th>original_language</th>\n",
+       "      <th>overview</th>\n",
+       "      <th>popularity</th>\n",
+       "      <th>release_date</th>\n",
+       "      <th>revenue</th>\n",
+       "      <th>runtime</th>\n",
+       "      <th>status</th>\n",
+       "      <th>tagline</th>\n",
+       "      <th>title</th>\n",
+       "      <th>vote_average</th>\n",
+       "      <th>vote_count</th>\n",
+       "      <th>movieId</th>\n",
+       "      <th>imdbId</th>\n",
+       "      <th>tmdbId</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>30000000</td>\n",
+       "      <td>[Animation, Comedy, Family]</td>\n",
+       "      <td>862</td>\n",
+       "      <td>114709</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
+       "      <td>21.946943</td>\n",
+       "      <td>815040000.0</td>\n",
+       "      <td>373554033</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td></td>\n",
+       "      <td>Toy Story</td>\n",
+       "      <td>7.7</td>\n",
+       "      <td>5415</td>\n",
+       "      <td>1</td>\n",
+       "      <td>114709</td>\n",
+       "      <td>862.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>65000000</td>\n",
+       "      <td>[Adventure, Fantasy, Family]</td>\n",
+       "      <td>8844</td>\n",
+       "      <td>113497</td>\n",
+       "      <td>en</td>\n",
+       "      <td>When siblings Judy and Peter discover an encha...</td>\n",
+       "      <td>17.015539</td>\n",
+       "      <td>819014400.0</td>\n",
+       "      <td>262797249</td>\n",
+       "      <td>104.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Roll the dice and unleash the excitement!</td>\n",
+       "      <td>Jumanji</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>2413</td>\n",
+       "      <td>2</td>\n",
+       "      <td>113497</td>\n",
+       "      <td>8844.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>[Romance, Comedy]</td>\n",
+       "      <td>15602</td>\n",
+       "      <td>113228</td>\n",
+       "      <td>en</td>\n",
+       "      <td>A family wedding reignites the ancient feud be...</td>\n",
+       "      <td>11.712900</td>\n",
+       "      <td>819619200.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>101.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Still Yelling. Still Fighting. Still Ready for...</td>\n",
+       "      <td>Grumpier Old Men</td>\n",
+       "      <td>6.5</td>\n",
+       "      <td>92</td>\n",
+       "      <td>3</td>\n",
+       "      <td>113228</td>\n",
+       "      <td>15602.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16000000</td>\n",
+       "      <td>[Comedy, Drama, Romance]</td>\n",
+       "      <td>31357</td>\n",
+       "      <td>114885</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
+       "      <td>3.859495</td>\n",
+       "      <td>819619200.0</td>\n",
+       "      <td>81452156</td>\n",
+       "      <td>127.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Friends are the people who let you be yourself...</td>\n",
+       "      <td>Waiting to Exhale</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>34</td>\n",
+       "      <td>4</td>\n",
+       "      <td>114885</td>\n",
+       "      <td>31357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>[Comedy]</td>\n",
+       "      <td>11862</td>\n",
+       "      <td>113041</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Just when George Banks has recovered from his ...</td>\n",
+       "      <td>8.387519</td>\n",
+       "      <td>792403200.0</td>\n",
+       "      <td>76578911</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Just When His World Is Back To Normal... He's ...</td>\n",
+       "      <td>Father of the Bride Part II</td>\n",
+       "      <td>5.7</td>\n",
+       "      <td>173</td>\n",
+       "      <td>5</td>\n",
+       "      <td>113041</td>\n",
+       "      <td>11862.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     budget                        genres     id  imdb_id original_language  \\\n",
+       "0  30000000   [Animation, Comedy, Family]    862   114709                en   \n",
+       "1  65000000  [Adventure, Fantasy, Family]   8844   113497                en   \n",
+       "2         0             [Romance, Comedy]  15602   113228                en   \n",
+       "3  16000000      [Comedy, Drama, Romance]  31357   114885                en   \n",
+       "4         0                      [Comedy]  11862   113041                en   \n",
+       "\n",
+       "                                            overview  popularity  \\\n",
+       "0  Led by Woody, Andy's toys live happily in his ...   21.946943   \n",
+       "1  When siblings Judy and Peter discover an encha...   17.015539   \n",
+       "2  A family wedding reignites the ancient feud be...   11.712900   \n",
+       "3  Cheated on, mistreated and stepped on, the wom...    3.859495   \n",
+       "4  Just when George Banks has recovered from his ...    8.387519   \n",
+       "\n",
+       "   release_date    revenue  runtime    status  \\\n",
+       "0   815040000.0  373554033     81.0  Released   \n",
+       "1   819014400.0  262797249    104.0  Released   \n",
+       "2   819619200.0          0    101.0  Released   \n",
+       "3   819619200.0   81452156    127.0  Released   \n",
+       "4   792403200.0   76578911    106.0  Released   \n",
+       "\n",
+       "                                             tagline  \\\n",
+       "0                                                      \n",
+       "1          Roll the dice and unleash the excitement!   \n",
+       "2  Still Yelling. Still Fighting. Still Ready for...   \n",
+       "3  Friends are the people who let you be yourself...   \n",
+       "4  Just When His World Is Back To Normal... He's ...   \n",
+       "\n",
+       "                         title  vote_average  vote_count  movieId  imdbId  \\\n",
+       "0                    Toy Story           7.7        5415        1  114709   \n",
+       "1                      Jumanji           6.9        2413        2  113497   \n",
+       "2             Grumpier Old Men           6.5          92        3  113228   \n",
+       "3            Waiting to Exhale           6.1          34        4  114885   \n",
+       "4  Father of the Bride Part II           5.7         173        5  113041   \n",
+       "\n",
+       "    tmdbId  \n",
+       "0    862.0  \n",
+       "1   8844.0  \n",
+       "2  15602.0  \n",
+       "3  31357.0  \n",
+       "4  11862.0  "
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "links_df = fetch_dataframe('links_small.csv')\n",
+    "movies_df = movies_df.merge(links_df, left_on='imdb_id', right_on='imdbId', how='inner')\n",
+    "movies_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll want to move our SVD user vectors and movie vectors and their corresponding userId and movieId into 2 dataframes for later processing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build a dataframe out of the user vectors and their userIds\n",
+    "user_vectors_and_ids = {train_set.to_raw_uid(inner_id): user_vectors[inner_id].tolist() for inner_id in train_set.all_users()}\n",
+    "user_vector_df = pd.Series(user_vectors_and_ids).to_frame('user_vector')\n",
+    "\n",
+    "# now do the same for the movie vectors and their movieIds\n",
+    "movie_vectors_and_ids = {train_set.to_raw_iid(inner_id): movie_vectors[inner_id].tolist() for inner_id in train_set.all_items()}\n",
+    "movie_vector_df = pd.Series(movie_vectors_and_ids).to_frame('movie_vector')\n",
+    "\n",
+    "# merge the movie vector series with the movies dataframe using the movieId and id fields\n",
+    "movies_df = movies_df.merge(movie_vector_df, left_on='id', right_index=True, how='inner')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Querying Vectors\n",
+    "You can use Redis’ vector similarity search to find the most similar vectors once they’re stored.\n",
+    "\n",
+    "\n",
+    "\n",
+    "Once you've stored your vectors in Redis, querying for vector similarity becomes straightforward, especially if you're using RediSearch with vector support (such as HNSW). I'll guide you through setting up and querying for vector similarity.\n",
+    "\n",
+    "Query Setup\n",
+    "We'll assume:\n",
+    "\n",
+    "You've already created a vector index using the HNSW algorithm (or another vector indexing mechanism).\n",
+    "You've stored your user or item vectors in Redis, either as fields in a Redis hash or as direct vector fields for vector similarity searches.\n",
+    "Step-by-Step Guide for Querying Vector Similarity\n",
+    "1. Create a Vector Index (If not already created)\n",
+    "Before you can perform similarity queries, you need to create a vector index using the FT.CREATE command. This defines how vectors are indexed in Redis.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redis import Redis\n",
+    "from redisvl.schema import IndexSchema\n",
+    "from redisvl.index import SearchIndex\n",
+    "\n",
+    "client = Redis.from_url(REDIS_URL)\n",
+    "\n",
+    "movie_schema = IndexSchema.from_yaml(\"collaborative_filtering_schema.yaml\")\n",
+    "\n",
+    "movie_index = SearchIndex(movie_schema, redis_client=client)\n",
+    "movie_index.create(overwrite=True, drop=True)\n",
+    "\n",
+    "user_schema = IndexSchema.from_yaml(\"user_schema.yaml\")\n",
+    "\n",
+    "user_index = SearchIndex(user_schema, redis_client=client)\n",
+    "user_index.create(overwrite=True, drop=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keys = movie_index.load(movies_df.to_dict(orient='records'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1494\n",
+      "28386\n",
+      "1494\n",
+      "1482\n",
+      "9065\n"
+     ]
+    }
+   ],
+   "source": [
+    "number_of_movies = len(movies_df.to_dict(orient='records'))\n",
+    "size_of_movie_df = movies_df.size\n",
+    "\n",
+    "print(number_of_movies)\n",
+    "print(size_of_movie_df)\n",
+    "unique_movie_ids = movies_df['id'].nunique()\n",
+    "print(unique_movie_ids)\n",
+    "unique_movie_titles = movies_df['title'].nunique()\n",
+    "print(unique_movie_titles)\n",
+    "\n",
+    "unique_movies_rated = ratings_df['movieId'].nunique()\n",
+    "print(unique_movies_rated)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Unlike in content filtering, where we want to compute vector similarity between items and we use cosine distance between items vectors to do so, in collaborative filtering we instead try to compute the predicted rating a user will give to a movie by taking the inner product of the user and movie vector.\n",
+    "\n",
+    "This is why in our `collaborative_filtering_schema.yaml` we use `ip` (inner product) as our distance metric.\n",
+    "\n",
+    "It's also why we'll use our user vector as the query vector when we do a vector query."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'id': 'movie:9df0babc731549909e929885973aee58', 'vector_distance': '-4.31072711945', 'title': 'The Million Dollar Hotel', 'genres': '[\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:5d7079fac9534a0585608c9b0d01ba80', 'vector_distance': '-4.11799812317', 'title': \"Pandora's Box\", 'genres': '[\"Drama\",\"Thriller\",\"Romance\"]'}\n",
+      "{'id': 'movie:120d744065394c499f76589586051337', 'vector_distance': '-4.10946702957', 'title': \"Monsieur  Hulot's Holiday\", 'genres': '[\"Comedy\",\"Family\"]'}\n",
+      "{'id': 'movie:ae34e2e4c64147c994f3c5c3c29f5190', 'vector_distance': '-4.01828145981', 'title': 'Scarface', 'genres': '[\"Action\",\"Crime\",\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:a1506eff623a4ba0a165a850e7250d3e', 'vector_distance': '-4.00052165985', 'title': 'The Thomas Crown Affair', 'genres': '[\"Romance\",\"Crime\",\"Thriller\",\"Drama\"]'}\n",
+      "{'id': 'movie:3dbfc6b11f374d649663c0365792a3d2', 'vector_distance': '-3.99500846863', 'title': 'Dead Man', 'genres': '[\"Drama\",\"Fantasy\",\"Western\"]'}\n",
+      "{'id': 'movie:4df54c2a6f674b85ab12ced9b6eddf3e', 'vector_distance': '-3.99404859543', 'title': 'True Romance', 'genres': '[\"Action\",\"Thriller\",\"Crime\",\"Romance\"]'}\n",
+      "{'id': 'movie:fef75d040f864cc4abeda72c3d7830b6', 'vector_distance': '-3.98610448837', 'title': 'Sunshine', 'genres': '[\"Science Fiction\",\"Thriller\"]'}\n",
+      "{'id': 'movie:e6b996ce1db7497da1595b1ffeba66c9', 'vector_distance': '-3.94609308243', 'title': 'The Sixth Sense', 'genres': '[\"Mystery\",\"Thriller\",\"Drama\"]'}\n",
+      "{'id': 'movie:38c0ee9c438242ed8347afefc2469ca5', 'vector_distance': '-3.94594812393', 'title': 'Zatoichi', 'genres': '[\"Adventure\",\"Drama\",\"Action\"]'}\n",
+      "{'id': 'movie:e6345ce2592a4807ae5929094f8871db', 'vector_distance': '-3.94536495209', 'title': 'Straw Dogs', 'genres': '[\"Crime\",\"Drama\",\"Thriller\",\"Mystery\"]'}\n",
+      "{'id': 'movie:679bbb2708e847dc8730a5bf87d10b95', 'vector_distance': '-3.93847608566', 'title': 'While You Were Sleeping', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:d88f066c1c1e4eedb4bbcad4be8e85cd', 'vector_distance': '-3.92860937119', 'title': 'Cold Mountain', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:67fa2af9660549a0bab428e2cb4c5801', 'vector_distance': '-3.92834091187', 'title': 'The Good Shepherd', 'genres': '[\"Drama\",\"Thriller\",\"History\"]'}\n",
+      "{'id': 'movie:5bc1a44fdea4421880acc9a050bdf8be', 'vector_distance': '-3.90582227707', 'title': \"Charlie's Angels\", 'genres': '[\"Action\",\"Adventure\",\"Comedy\",\"Crime\",\"Thriller\"]'}\n",
+      "{'id': 'movie:f1459b204f054f8daa7bb03e349d4bc1', 'vector_distance': '-3.90310573578', 'title': 'Gremlins 2: The New Batch', 'genres': '[\"Comedy\",\"Horror\",\"Fantasy\"]'}\n",
+      "{'id': 'movie:d0d08cb2caa44b42a3f21af5687fb7dc', 'vector_distance': '-3.90276098251', 'title': 'Ghost Rider', 'genres': '[\"Thriller\",\"Action\",\"Fantasy\",\"Horror\"]'}\n",
+      "{'id': 'movie:0c27eb9238744640a318c57104df4ddb', 'vector_distance': '-3.90011119843', 'title': 'Once Were Warriors', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:e0a34f0930fb44d5857dc8d75b05b985', 'vector_distance': '-3.89683389664', 'title': 'Hard Target', 'genres': '[\"Action\",\"Adventure\",\"Crime\",\"Thriller\"]'}\n",
+      "{'id': 'movie:0facbd81368840769a3b49dcbd479472', 'vector_distance': '-3.89002752304', 'title': 'Space Jam', 'genres': '[\"Animation\",\"Comedy\",\"Drama\",\"Family\",\"Fantasy\"]'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from redisvl.query import RangeQuery, FilterQuery\n",
+    "from redisvl.query.filter import Tag, Num, Text\n",
+    "\n",
+    "user_1_vector = user_vectors[20].tolist()\n",
+    "\n",
+    "# the distance metric 'ip' inner product is computing \"score = 1 - u * v\" and returning the minimum, which corresponds to the max of \"u * v\"\n",
+    "# this is what we want. The predicted rating on a scale of 0 to 5 is then -(score - 1) == -score + 1\n",
+    "query = RangeQuery(vector=user_1_vector,\n",
+    "                   vector_field_name='movie_vector',\n",
+    "                  num_results=20,\n",
+    "                  return_score=True,\n",
+    "                  return_fields=['title', 'genres']\n",
+    "                  )\n",
+    "\n",
+    "results = movie_index.query(query)\n",
+    "\n",
+    "for r in results:\n",
+    "    print(r)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "redis-ai-res",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml b/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
new file mode 100644
index 00000000..f10d686b
--- /dev/null
+++ b/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
@@ -0,0 +1,40 @@
+index:
+    name: movies
+    prefix: movie
+    storage_type: json
+
+fields:
+    - name: genres
+      type: tag
+    - name: movie_id
+      type: tag
+    - name: original_language
+      type: tag
+    - name: overview
+      type: text
+    - name: popularity
+      type: numeric
+    - name: release_date
+      type: numeric
+    - name: revenue
+      type: numeric
+    - name: runtime
+      type: numeric
+    - name: status
+      type: tag
+    - name: tagline
+      type: text
+    - name: title
+      type: text
+    - name: vote_average
+      type: numeric
+    - name: vote_count
+      type: numeric
+
+    - name: movie_vector
+      type: vector
+      attrs:
+          dims: 100
+          distance_metric: ip
+          algorithm: flat
+          dtype: float32
\ No newline at end of file
diff --git a/python-recipes/recommendation-systems/user_schema.yaml b/python-recipes/recommendation-systems/user_schema.yaml
new file mode 100644
index 00000000..95511038
--- /dev/null
+++ b/python-recipes/recommendation-systems/user_schema.yaml
@@ -0,0 +1,18 @@
+index:
+    name: users
+    prefix: user
+    storage_type: json
+
+fields:
+    - name: usr_id
+      type: tag
+      name: ratings
+      type: numeric
+
+    - name: user_vector
+      type: vector
+      attrs:
+          dims: 100
+          distance_metric: ip
+          algorithm: flat
+          dtype: float32
\ No newline at end of file

From b254c3391523daefb62b01e5a46fe6af85a849ba Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 9 Oct 2024 17:51:56 -0700
Subject: [PATCH 02/12] cleans up collab filtering notebook. updates user
 schema

---
 .../collaborative_filtering.ipynb             | 307 ++++++++----------
 .../recommendation-systems/user_schema.yaml   |   4 +-
 2 files changed, 143 insertions(+), 168 deletions(-)

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index 26a4de60..e4fdfdd3 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -13,10 +13,12 @@
    "source": [
     "Recommendation systems are a common application of machine learning and serve many industries from e-commerce to music streaming platforms.\n",
     "\n",
-    "There are many different architechtures that can be followed to build a recommender system. \n",
+    "There are many different architechtures that can be followed to build a recommendation system.\n",
     "\n",
-    "In this notebook we'll demonstrate how to build a [content filtering](https://en.wikipedia.org/wiki/Recommender_system#:~:text=of%20hybrid%20systems.-,Content%2Dbased%20filtering,-%5Bedit%5D)\n",
-    "recommender and use the movies dataset as our example data."
+    "In this notebook we'll demonstrate how to build a [collaborative filtering](https://en.wikipedia.org/wiki/Collaborative_filtering)\n",
+    "recommendation system and use the large IMDB movies dataset as our example data.\n",
+    "\n",
+    "To generate our vectors we'll use the popular Python package [Surprise](https://surpriselib.com/)"
    ]
   },
   {
@@ -34,7 +36,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "## IMPORTS\n",
     "import os\n",
     "import requests\n",
     "import pandas as pd\n",
@@ -54,59 +55,11 @@
     "REDIS_URL = f\"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}\""
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "## EVALUATE MOVE TO COLLABORATIVE FILTERING SO WE CAN SHOW BETTER NUMBERS\n",
-    "#let's see how well this works. we can choose some users, and based on their first watched movie we can recommend them some more.\n",
-    "#we can then look at the set intersection between our recommendations and the movies they actually watched (and rated highly) to see how well we did."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "## DONE\n",
-    "# clean up your index\n",
-    "\n",
-    "#while remaining := index.clear():\n",
-    "#    print(f\"Deleted {remaining} keys\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# YOLO FTW"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To build a collaborative filtering example using the Surprise library and the Movies dataset, we need to first load the data, format it according to the requirements of Surprise, and then apply a collaborative filtering algorithm like SVD.\n",
-    "\n",
-    "Since you mentioned a modified version of the dataset hosted on Kaggle, I’ll show you how to structure the code, assuming you have the dataset ready.\n",
-    "\n",
-    "Here’s an example:\n",
-    "\n",
-    "Step-by-Step Guide\n",
-    "Install necessary libraries: Ensure you have installed the Surprise library if you haven’t already."
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Loading and Preparing the Data: Let’s assume the dataset contains at least two relevant files: ratings.csv (user, movie, rating) and movies.csv (movieId, title).\n",
-    "\n",
-    "You’ll need to load the ratings data and prepare it for use with Surprise."
+    "To build a collaborative filtering example using the Surprise library and the Movies dataset, we need to first load the data, format it according to the requirements of Surprise, and then apply a collaborative filtering algorithm like SVD."
    ]
   },
   {
@@ -151,41 +104,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Training Our Model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13a767e50>"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# split the data into training and testing sets (80% train, 20% test)\n",
-    "train_set, test_set = train_test_split(ratings_data, test_size=0.2)\n",
-    "\n",
-    "# use SVD (Singular Value Decomposition) for collaborative filtering\n",
-    "svd_algo = SVD(biased=False)  # We'll set biased to False so that predictions are of the form \"rating_prediction = user_vector dot item_vector\"\n",
-    "\n",
-    "# train the algorithm on the train_set\n",
-    "svd_algo.fit(train_set)"
+    "# What is Collaborative Filtering"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A lot happened in the cell above. We split our full data into train and test sets. We defined the collaborative filtering algorithm to use, which in this case is the Singular Value Decomposition (SVD) algorithm. lastly, we fit our model to our data.\n",
+    "A lot is going to happen in the code cell below. We split our full data into train and test sets. We defined the collaborative filtering algorithm to use, which in this case is the Singular Value Decomposition (SVD) algorithm. lastly, we fit our model to our data.\n",
     "\n",
     "It's worth going into more detail why we chose this algorithm and what it is computing in the `.fit(train_set)` method we're calling.\n",
     "First, let's think about what data it's receiving - our ratings data. This only contains the user_ids, movie_ids, and the user's ratings of their watched movies on a scale of 1 to 5.\n",
@@ -201,7 +127,7 @@
     "| user_5 |         |    4    |    5    |    2    |         |         |         |\n",
     "| ...... |         |         |         |         |         |         |         |\n",
     "\n",
-    "Our empty cells aren't zero's their missing ratings, so `user_1` has never rated `movie_3`. They may like it or hate it."
+    "Our empty cells aren't zero's, they're missing ratings, so `user_1` has never rated `movie_3`. They may like it or hate it."
    ]
   },
   {
@@ -209,14 +135,14 @@
    "metadata": {},
    "source": [
     "Unlike Content Filtering, here we're only considering the ratings that users assign. We don't know the plot or genre or release year of any of these films.\n",
-    "But we can still build recommender by assuming that users have similar tastes to each other. As an intuitive example, we can see that `user_1` and `user_4` have very similar ratings on several movies, so we can assume that `user_4` will rate `movie_6` highly, just as `user_1` did. This is the idea behind collaborative filtering."
+    "But we can still build recommender by assuming that users have similar tastes to each other. As an intuitive example, we can see that `user_1` and `user_4` have very similar ratings on several movies, so we will assume that `user_4` will rate `movie_6` highly, just as `user_1` did. This is the idea behind collaborative filtering."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "That's the idea, but what about the math? Since we only have this matrix to work with what we want to do is decompose it into two constituent matrices.\n",
+    "That's the intuition, but what about the math? Since we only have this matrix to work with what we want to do is decompose it into two constituent matrices.\n",
     "Lets call our ratings matrix `[R]`. We want to find two other matrices, a user matrix `[U]`, and a movies matrix `[M]` that fit the equation:\n",
     "\n",
     "`[U] * [M] = [R]`\n",
@@ -240,92 +166,116 @@
     "| movie_1_feature_k | movie_2_feature_k | movie_3_feature_k | ... | movie_M_feature_k |\n",
     "\n",
     "\n",
-    "these features are called the latent features and are the values we're trying to find when we call the `.fit(training_data)` method. The algorithm that computes these features from our ratings matrix is the SVD algorithm."
+    "these features are called the latent features (or latent factors) and are the values we're trying to find when we call the `svd_algo.fit(training_data)` method. The algorithm that computes these features from our ratings matrix is the SVD algorithm. The number of users and movies is set by our data. The size of the latent feature vectors `k` is a parameter we choose. We'll keep it at the default 100 for this notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13a767e50>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# split the data into training and testing sets (80% train, 20% test)\n",
+    "train_set, test_set = train_test_split(ratings_data, test_size=0.2)\n",
+    "\n",
+    "# use SVD (Singular Value Decomposition) for collaborative filtering\n",
+    "svd_algo = SVD(n_factors=100, biased=False)  # We'll set biased to False so that predictions are of the form \"rating_prediction = user_vector dot item_vector\"\n",
+    "\n",
+    "# train the algorithm on the train_set\n",
+    "svd_algo.fit(train_set)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Explanation:\n",
-    "Dataset Preparation: We load the ratings data and ensure it has the necessary format with userId, movieId, and rating columns.\n",
-    "Surprise Reader: This helps in specifying the format of the data.\n",
-    "SVD Algorithm: We use the SVD algorithm for collaborative filtering. It decomposes the user-item interaction matrix into the latent factors.\n",
-    "Accuracy: After training the model, we evaluate it using the RMSE (Root Mean Squared Error).\n",
-    "Next Steps:\n",
-    "You can experiment with different algorithms such as KNNBasic or NMF in the Surprise library.\n",
-    "If your dataset contains titles, you can join movies.csv to display movie names in recommendations.\n",
-    "Would you like more details on dataset preprocessing or any specific functionality in collaborative filtering?"
+    "## Extracting The User and Movie Vectors"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To extract user and item (movie) vectors from an SVD model trained using Surprise and then store them in a Redis vector database, you'll need to:\n",
-    "\n",
-    "Extract the learned latent factors (user and item vectors) from the SVD model.\n",
-    "Use Redis-py (Python client for Redis) to store those vectors in Redis as vectors in a vector database (assuming you are using Redis with the RedisVector or RedisSearch modules).\n",
-    "Here’s how to do it:\n",
-    "\n",
-    "Step 1: Extract User and Item Vectors from the SVD Model\n",
-    "The Surprise SVD model stores user and item vectors (latent factors) in two attributes:\n",
+    "Now that the the SVD algorithm has computed our `[U]` and `[M]` matrices - which are both really just lists of vectors - we can load them into our Redis instance.\n",
     "\n",
-    "algo.pu: user factors matrix (a matrix where each row corresponds to the latent factors of a user).\n",
-    "algo.qi: item factors matrix (a matrix where each row corresponds to the latent factors of an item/movie).\n",
-    "These matrices store the vectors in the latent space after training.\n",
+    "The Surprise SVD model stores user and movie vectors in two attributes:\n",
     "\n",
-    "Step 2: Save the Vectors in Redis\n",
-    "Redis stores vectors in vector databases, such as Redis' HNSW index for vector similarity search. You can store both user and movie vectors as hashes in Redis and then use them for similarity search or recommendations.\n",
+    "`algo.pu`: user features matrix (a matrix where each row corresponds to the latent features of a user).\n",
+    "`algo.qi`: item features matrix (a matrix where each row corresponds to the latent features of an item/movie).\n",
     "\n",
-    "Install Redis and Redis-py\n",
-    "Make sure you have Redis installed with vector support (RediSearch or RedisVL), and install the Redis-py package:"
+    "It's worth noting that the matrix `algo.qi` is the transpose of the matrix `[M]` we defined above. This way each row corresponds to one movie"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(671, 100)\n",
-      "(8405, 100)\n"
+      "we have 671 users with feature vectors of size 100\n",
+      "we have 8405 movies with feature vectors of size 100\n"
      ]
     }
    ],
    "source": [
-    "# step 1: extract vectors\n",
     "user_vectors = svd_algo.pu  # user latent features (matrix)\n",
     "movie_vectors = svd_algo.qi  # movie latent features (matrix)\n",
     "\n",
-    "print(user_vectors.shape)\n",
-    "print(movie_vectors.shape)"
+    "print(f'we have {user_vectors.shape[0]} users with feature vectors of size {user_vectors.shape[1]}')\n",
+    "print(f'we have {movie_vectors.shape[0]} movies with feature vectors of size {movie_vectors.shape[1]}')"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Explanation:\n",
-    "Extract Vectors:\n",
+    "# Predicting User Ratings\n",
+    "The great thing about collaborative filtering is that using our user and movie vectors we can predict the rating any user will give to any movie in our dataset.\n",
+    "And unlike content filtering, there is no assumption that all the movies a user will be recommended are similar to each other. A user can be recommended dark horror films and light-hearted animations.\n",
     "\n",
-    "algo.pu gives you a matrix where each row corresponds to a user’s latent factors (user vector).\n",
-    "algo.qi gives you a matrix where each row corresponds to an item/movie’s latent factors (item vector).\n",
-    "Store in Redis:\n",
+    "Looking back at our SVD algorithm the equation is [User_features] * [Movie_features].transpose = [Ratings]\n",
+    "So to get a prediction of what a user will rate a movie they haven't seen yet we just need to take the dot product of that user's feature vector and a movie's feature vector."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the predicted rating of user 347 on movie 5515 is 1.4150893670982523\n"
+     ]
+    }
+   ],
+   "source": [
+    "# predict one user's rating of one film\n",
+    "# surprise casts userId and movieId to inner ids\n",
     "\n",
-    "We store each vector under a unique Redis key (e.g., user:123, item:456).\n",
-    "The vector is stored as a hash in Redis with each dimension (dim_0, dim_1, etc.) being a field in the hash.\n",
-    "Step 3: Advanced Storage for Vector Similarity Search\n",
-    "If you want to store the vectors in a Redis vector search index (e.g., HNSW from RedisSearch for vector similarity queries), you would follow the Redis commands for indexing:\n",
-    "\n"
+    "inner_uid = train_set.to_inner_uid(347) # user_id\n",
+    "inner_iid = train_set.to_inner_iid(5515) # movie_id\n",
+    "print(f'the predicted rating of user {347} on movie {5515} is {np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])}')\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -342,17 +292,18 @@
        "Prediction(uid=347, iid=5515, r_ui=None, est=1.4150893670982523, details={'was_impossible': False})"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "# sanity check my math matches Surprise package math\n",
     "print(svd_algo.predict(347, 5515))\n",
     "\n",
     "inner_uid = train_set.to_inner_uid(347)\n",
     "inner_iid = train_set.to_inner_iid(5515)\n",
-    "print(np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])) # surpirse casts userId and movieId to inner ids\n",
+    "print(np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])) # surprise casts userId and movieId to inner ids\n",
     "svd_algo.predict(347, 5515)"
    ]
   },
@@ -924,22 +875,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Querying Vectors\n",
-    "You can use Redis’ vector similarity search to find the most similar vectors once they’re stored.\n",
-    "\n",
-    "\n",
-    "\n",
-    "Once you've stored your vectors in Redis, querying for vector similarity becomes straightforward, especially if you're using RediSearch with vector support (such as HNSW). I'll guide you through setting up and querying for vector similarity.\n",
+    "## RedisVL Handles the Scale\n",
     "\n",
-    "Query Setup\n",
-    "We'll assume:\n",
-    "\n",
-    "You've already created a vector index using the HNSW algorithm (or another vector indexing mechanism).\n",
-    "You've stored your user or item vectors in Redis, either as fields in a Redis hash or as direct vector fields for vector similarity searches.\n",
-    "Step-by-Step Guide for Querying Vector Similarity\n",
-    "1. Create a Vector Index (If not already created)\n",
-    "Before you can perform similarity queries, you need to create a vector index using the FT.CREATE command. This defines how vectors are indexed in Redis.\n",
-    "\n"
+    "Especially for large datasets like the 45,000 movie catalog we're dealing with, you'll want Redis to do the heavy lifting of vector search.\n",
+    "All that's needed is to define the search index and load our data we've cleaned and merged with our vectors.\n"
    ]
   },
   {
@@ -992,6 +931,7 @@
     }
    ],
    "source": [
+    "# sanity check I merged all my dataframes properly and have the right sizes of moives, users, vectors, ids, etc.\n",
     "number_of_movies = len(movies_df.to_dict(orient='records'))\n",
     "size_of_movie_df = movies_df.size\n",
     "\n",
@@ -1019,33 +959,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'movie:9df0babc731549909e929885973aee58', 'vector_distance': '-4.31072711945', 'title': 'The Million Dollar Hotel', 'genres': '[\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:5d7079fac9534a0585608c9b0d01ba80', 'vector_distance': '-4.11799812317', 'title': \"Pandora's Box\", 'genres': '[\"Drama\",\"Thriller\",\"Romance\"]'}\n",
-      "{'id': 'movie:120d744065394c499f76589586051337', 'vector_distance': '-4.10946702957', 'title': \"Monsieur  Hulot's Holiday\", 'genres': '[\"Comedy\",\"Family\"]'}\n",
-      "{'id': 'movie:ae34e2e4c64147c994f3c5c3c29f5190', 'vector_distance': '-4.01828145981', 'title': 'Scarface', 'genres': '[\"Action\",\"Crime\",\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:a1506eff623a4ba0a165a850e7250d3e', 'vector_distance': '-4.00052165985', 'title': 'The Thomas Crown Affair', 'genres': '[\"Romance\",\"Crime\",\"Thriller\",\"Drama\"]'}\n",
-      "{'id': 'movie:3dbfc6b11f374d649663c0365792a3d2', 'vector_distance': '-3.99500846863', 'title': 'Dead Man', 'genres': '[\"Drama\",\"Fantasy\",\"Western\"]'}\n",
-      "{'id': 'movie:4df54c2a6f674b85ab12ced9b6eddf3e', 'vector_distance': '-3.99404859543', 'title': 'True Romance', 'genres': '[\"Action\",\"Thriller\",\"Crime\",\"Romance\"]'}\n",
-      "{'id': 'movie:fef75d040f864cc4abeda72c3d7830b6', 'vector_distance': '-3.98610448837', 'title': 'Sunshine', 'genres': '[\"Science Fiction\",\"Thriller\"]'}\n",
-      "{'id': 'movie:e6b996ce1db7497da1595b1ffeba66c9', 'vector_distance': '-3.94609308243', 'title': 'The Sixth Sense', 'genres': '[\"Mystery\",\"Thriller\",\"Drama\"]'}\n",
-      "{'id': 'movie:38c0ee9c438242ed8347afefc2469ca5', 'vector_distance': '-3.94594812393', 'title': 'Zatoichi', 'genres': '[\"Adventure\",\"Drama\",\"Action\"]'}\n",
-      "{'id': 'movie:e6345ce2592a4807ae5929094f8871db', 'vector_distance': '-3.94536495209', 'title': 'Straw Dogs', 'genres': '[\"Crime\",\"Drama\",\"Thriller\",\"Mystery\"]'}\n",
-      "{'id': 'movie:679bbb2708e847dc8730a5bf87d10b95', 'vector_distance': '-3.93847608566', 'title': 'While You Were Sleeping', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:d88f066c1c1e4eedb4bbcad4be8e85cd', 'vector_distance': '-3.92860937119', 'title': 'Cold Mountain', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:67fa2af9660549a0bab428e2cb4c5801', 'vector_distance': '-3.92834091187', 'title': 'The Good Shepherd', 'genres': '[\"Drama\",\"Thriller\",\"History\"]'}\n",
-      "{'id': 'movie:5bc1a44fdea4421880acc9a050bdf8be', 'vector_distance': '-3.90582227707', 'title': \"Charlie's Angels\", 'genres': '[\"Action\",\"Adventure\",\"Comedy\",\"Crime\",\"Thriller\"]'}\n",
-      "{'id': 'movie:f1459b204f054f8daa7bb03e349d4bc1', 'vector_distance': '-3.90310573578', 'title': 'Gremlins 2: The New Batch', 'genres': '[\"Comedy\",\"Horror\",\"Fantasy\"]'}\n",
-      "{'id': 'movie:d0d08cb2caa44b42a3f21af5687fb7dc', 'vector_distance': '-3.90276098251', 'title': 'Ghost Rider', 'genres': '[\"Thriller\",\"Action\",\"Fantasy\",\"Horror\"]'}\n",
-      "{'id': 'movie:0c27eb9238744640a318c57104df4ddb', 'vector_distance': '-3.90011119843', 'title': 'Once Were Warriors', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:e0a34f0930fb44d5857dc8d75b05b985', 'vector_distance': '-3.89683389664', 'title': 'Hard Target', 'genres': '[\"Action\",\"Adventure\",\"Crime\",\"Thriller\"]'}\n",
-      "{'id': 'movie:0facbd81368840769a3b49dcbd479472', 'vector_distance': '-3.89002752304', 'title': 'Space Jam', 'genres': '[\"Animation\",\"Comedy\",\"Drama\",\"Family\",\"Fantasy\"]'}\n"
+      "{'id': 'movie:9df0babc731549909e929885973aee58', 'vector_distance': '-3.6087179184', 'title': 'The Million Dollar Hotel', 'genres': '[\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:ad9142a30fd045cfbb1e5ae0b051f4c8', 'vector_distance': '-3.56296348572', 'title': 'Terminator 3: Rise of the Machines', 'genres': '[\"Action\",\"Thriller\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:bf3354ae719b44c782876272c5f95ce1', 'vector_distance': '-3.52630567551', 'title': 'Beverly Hills Cop III', 'genres': '[\"Action\",\"Comedy\",\"Crime\"]'}\n",
+      "{'id': 'movie:88ce7f7738104c539b004135f306e9ec', 'vector_distance': '-3.46648168564', 'title': 'Backdraft', 'genres': '[\"Action\",\"Crime\",\"Drama\",\"Mystery\",\"Thriller\"]'}\n",
+      "{'id': 'movie:9fe2e20d887a4263a540f3945f10751d', 'vector_distance': '-3.40900659561', 'title': 'Boogie Nights', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:9cd57bacb5de437a88279a17c2161ce2', 'vector_distance': '-3.38699388504', 'title': 'The Good Thief', 'genres': '[\"Crime\",\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:5d7079fac9534a0585608c9b0d01ba80', 'vector_distance': '-3.3247923851', 'title': \"Pandora's Box\", 'genres': '[\"Drama\",\"Thriller\",\"Romance\"]'}\n",
+      "{'id': 'movie:fa9cf76285c348078bb9814fa6b9357f', 'vector_distance': '-3.31738758087', 'title': 'Dawn of the Dead', 'genres': '[\"Horror\"]'}\n",
+      "{'id': 'movie:0c27eb9238744640a318c57104df4ddb', 'vector_distance': '-3.309486866', 'title': 'Once Were Warriors', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:f490c86a71164bf3956d92be5de8ba05', 'vector_distance': '-3.30277919769', 'title': 'Light of Day', 'genres': '[\"Music\",\"Drama\"]'}\n",
+      "{'id': 'movie:e8ef474819814eaea6cb757449d3eded', 'vector_distance': '-3.28701210022', 'title': 'Beetlejuice', 'genres': '[\"Fantasy\",\"Comedy\"]'}\n",
+      "{'id': 'movie:0c596d8911e0498a854b4e6d5faae545', 'vector_distance': '-3.27935218811', 'title': 'Enough', 'genres': '[\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:d0d08cb2caa44b42a3f21af5687fb7dc', 'vector_distance': '-3.26998329163', 'title': 'Ghost Rider', 'genres': '[\"Thriller\",\"Action\",\"Fantasy\",\"Horror\"]'}\n",
+      "{'id': 'movie:b5bcd6dba0474b709ccb3b10c3e2fb14', 'vector_distance': '-3.26271867752', 'title': 'Cousin, Cousine', 'genres': '[\"Romance\",\"Comedy\"]'}\n",
+      "{'id': 'movie:5726b3476a2d450db8792217298b7b57', 'vector_distance': '-3.25473356247', 'title': \"We're No Angels\", 'genres': '[\"Comedy\",\"Crime\",\"Drama\"]'}\n",
+      "{'id': 'movie:f1459b204f054f8daa7bb03e349d4bc1', 'vector_distance': '-3.25318956375', 'title': 'Gremlins 2: The New Batch', 'genres': '[\"Comedy\",\"Horror\",\"Fantasy\"]'}\n",
+      "{'id': 'movie:1cb9f1fb5d3f45a3861607aca03dfd4d', 'vector_distance': '-3.20173215866', 'title': 'Sleepless in Seattle', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:a567595136ef488b90637b77afb10664', 'vector_distance': '-3.19421386719', 'title': 'Point Break', 'genres': '[\"Action\",\"Thriller\",\"Crime\"]'}\n",
+      "{'id': 'movie:776e7891798048629d2dfa532ace8ff5', 'vector_distance': '-3.1903834343', 'title': \"My Best Friend's Wedding\", 'genres': '[\"Comedy\",\"Romance\"]'}\n",
+      "{'id': 'movie:6bcc04c814d24da6926161c9e0c10a76', 'vector_distance': '-3.17692661285', 'title': 'Cool Hand Luke', 'genres': '[\"Crime\",\"Drama\"]'}\n"
      ]
     }
    ],
@@ -1053,11 +993,11 @@
     "from redisvl.query import RangeQuery, FilterQuery\n",
     "from redisvl.query.filter import Tag, Num, Text\n",
     "\n",
-    "user_1_vector = user_vectors[20].tolist()\n",
+    "user_vector = user_vectors[352].tolist()\n",
     "\n",
     "# the distance metric 'ip' inner product is computing \"score = 1 - u * v\" and returning the minimum, which corresponds to the max of \"u * v\"\n",
     "# this is what we want. The predicted rating on a scale of 0 to 5 is then -(score - 1) == -score + 1\n",
-    "query = RangeQuery(vector=user_1_vector,\n",
+    "query = RangeQuery(vector=user_vector,\n",
     "                   vector_field_name='movie_vector',\n",
     "                  num_results=20,\n",
     "                  return_score=True,\n",
@@ -1069,6 +1009,39 @@
     "for r in results:\n",
     "    print(r)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "## EVALUATE MOVE TO COLLABORATIVE FILTERING SO WE CAN SHOW BETTER NUMBERS\n",
+    "#let's see how well this works. we can choose some users, and based on their first watched movie we can recommend them some more.\n",
+    "#we can then look at the set intersection between our recommendations and the movies they actually watched (and rated highly) to see how well we did."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO use bloom filter and/or cuckoo filter with the recommendations and user's watched_list in their index to filter out movies they already watched"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean up your index\n",
+    "\n",
+    "while remaining := movie_index.clear():\n",
+    "    print(f\"Deleted {remaining} keys\")"
+   ]
   }
  ],
  "metadata": {
diff --git a/python-recipes/recommendation-systems/user_schema.yaml b/python-recipes/recommendation-systems/user_schema.yaml
index 95511038..e89bd6a0 100644
--- a/python-recipes/recommendation-systems/user_schema.yaml
+++ b/python-recipes/recommendation-systems/user_schema.yaml
@@ -6,8 +6,10 @@ index:
 fields:
     - name: usr_id
       type: tag
-      name: ratings
+    - name: ratings
       type: numeric
+      name: watched_list
+      type: text
 
     - name: user_vector
       type: vector

From 9778b0abd6e2693f76452d7ebcc166a1e8d5fe22 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Thu, 10 Oct 2024 20:58:44 -0700
Subject: [PATCH 03/12] fixes dataframe merging on movie ids

---
 .../collaborative_filtering.ipynb             | 398 +++++++++---------
 .../recommendation-systems/user_schema.yaml   |   2 +-
 2 files changed, 211 insertions(+), 189 deletions(-)

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index e4fdfdd3..9977d2ef 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -4,7 +4,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Collaborative Filtering in RedisVL"
+    "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n",
+    "\n",
+    "# Collaborative Filtering in RedisVL\n",
+    "\n",
+    "<a href=\"https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/recomendation-systems/collaborative_filtering.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
    ]
   },
   {
@@ -23,16 +27,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 274,
    "metadata": {},
    "outputs": [],
    "source": [
+    "# NBVAL_SKIP\n",
     "!pip install scikit-surprise --quiet"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 275,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -64,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 276,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -84,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 277,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -113,7 +118,7 @@
    "source": [
     "A lot is going to happen in the code cell below. We split our full data into train and test sets. We defined the collaborative filtering algorithm to use, which in this case is the Singular Value Decomposition (SVD) algorithm. lastly, we fit our model to our data.\n",
     "\n",
-    "It's worth going into more detail why we chose this algorithm and what it is computing in the `.fit(train_set)` method we're calling.\n",
+    "It's worth going into more detail why we chose this algorithm and what it is computing in the `svd.fit(train_set)` method we're calling.\n",
     "First, let's think about what data it's receiving - our ratings data. This only contains the user_ids, movie_ids, and the user's ratings of their watched movies on a scale of 1 to 5.\n",
     "\n",
     "We can put this data into a matrix with rows being users and columns being movies\n",
@@ -134,15 +139,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Unlike Content Filtering, here we're only considering the ratings that users assign. We don't know the plot or genre or release year of any of these films.\n",
-    "But we can still build recommender by assuming that users have similar tastes to each other. As an intuitive example, we can see that `user_1` and `user_4` have very similar ratings on several movies, so we will assume that `user_4` will rate `movie_6` highly, just as `user_1` did. This is the idea behind collaborative filtering."
+    "Unlike Content Filtering, here we're only considering the ratings that users assign. We don't know the plot or genre or release year of any of these films. We don't even know the title.\n",
+    "But we can still build a recommender by assuming that users have similar tastes to each other. As an intuitive example, we can see that `user_1` and `user_4` have very similar ratings on several movies, so we will assume that `user_4` will rate `movie_6` highly, just as `user_1` did. This is the idea behind collaborative filtering."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "That's the intuition, but what about the math? Since we only have this matrix to work with what we want to do is decompose it into two constituent matrices.\n",
+    "That's the intuition, but what about the math? Since we only have this matrix to work with, what we want to do is decompose it into two constituent matrices.\n",
     "Lets call our ratings matrix `[R]`. We want to find two other matrices, a user matrix `[U]`, and a movies matrix `[M]` that fit the equation:\n",
     "\n",
     "`[U] * [M] = [R]`\n",
@@ -166,21 +171,21 @@
     "| movie_1_feature_k | movie_2_feature_k | movie_3_feature_k | ... | movie_M_feature_k |\n",
     "\n",
     "\n",
-    "these features are called the latent features (or latent factors) and are the values we're trying to find when we call the `svd_algo.fit(training_data)` method. The algorithm that computes these features from our ratings matrix is the SVD algorithm. The number of users and movies is set by our data. The size of the latent feature vectors `k` is a parameter we choose. We'll keep it at the default 100 for this notebook."
+    "these features are called the latent features (or latent factors) and are the values we're trying to find when we call the `svd.fit(training_data)` method. The algorithm that computes these features from our ratings matrix is the SVD algorithm. The number of users and movies is set by our data. The size of the latent feature vectors `k` is a parameter we choose. We'll keep it at the default 100 for this notebook."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 278,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13a767e50>"
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x33873df10>"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 278,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -190,10 +195,10 @@
     "train_set, test_set = train_test_split(ratings_data, test_size=0.2)\n",
     "\n",
     "# use SVD (Singular Value Decomposition) for collaborative filtering\n",
-    "svd_algo = SVD(n_factors=100, biased=False)  # We'll set biased to False so that predictions are of the form \"rating_prediction = user_vector dot item_vector\"\n",
+    "svd = SVD(n_factors=100, biased=False)  # We'll set biased to False so that predictions are of the form \"rating_prediction = user_vector dot item_vector\"\n",
     "\n",
     "# train the algorithm on the train_set\n",
-    "svd_algo.fit(train_set)"
+    "svd.fit(train_set)"
    ]
   },
   {
@@ -211,15 +216,15 @@
     "\n",
     "The Surprise SVD model stores user and movie vectors in two attributes:\n",
     "\n",
-    "`algo.pu`: user features matrix (a matrix where each row corresponds to the latent features of a user).\n",
-    "`algo.qi`: item features matrix (a matrix where each row corresponds to the latent features of an item/movie).\n",
+    "`svd.pu`: user features matrix (a matrix where each row corresponds to the latent features of a user).\n",
+    "`svd.qi`: item features matrix (a matrix where each row corresponds to the latent features of an item/movie).\n",
     "\n",
-    "It's worth noting that the matrix `algo.qi` is the transpose of the matrix `[M]` we defined above. This way each row corresponds to one movie"
+    "It's worth noting that the matrix `svd.qi` is the transpose of the matrix `[M]` we defined above. This way each row corresponds to one movie."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 279,
    "metadata": {},
    "outputs": [
     {
@@ -227,13 +232,13 @@
      "output_type": "stream",
      "text": [
       "we have 671 users with feature vectors of size 100\n",
-      "we have 8405 movies with feature vectors of size 100\n"
+      "we have 8435 movies with feature vectors of size 100\n"
      ]
     }
    ],
    "source": [
-    "user_vectors = svd_algo.pu  # user latent features (matrix)\n",
-    "movie_vectors = svd_algo.qi  # movie latent features (matrix)\n",
+    "user_vectors = svd.pu  # user latent features (matrix)\n",
+    "movie_vectors = svd.qi  # movie latent features (matrix)\n",
     "\n",
     "print(f'we have {user_vectors.shape[0]} users with feature vectors of size {user_vectors.shape[1]}')\n",
     "print(f'we have {movie_vectors.shape[0]} movies with feature vectors of size {movie_vectors.shape[1]}')"
@@ -253,64 +258,55 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 280,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "the predicted rating of user 347 on movie 5515 is 1.4150893670982523\n"
+      "the predicted rating of user 347 on movie 5515 is 1.83012299501901\n"
      ]
     }
    ],
    "source": [
-    "# predict one user's rating of one film\n",
-    "# surprise casts userId and movieId to inner ids\n",
-    "\n",
+    "# surprise casts userId and movieId to inner ids, so we have to use their mapping to now which rows to use\n",
     "inner_uid = train_set.to_inner_uid(347) # user_id\n",
     "inner_iid = train_set.to_inner_iid(5515) # movie_id\n",
-    "print(f'the predicted rating of user {347} on movie {5515} is {np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])}')\n"
+    "\n",
+    "# predict one user's rating of one film\n",
+    "predicted_rating = np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])\n",
+    "print(f'the predicted rating of user {347} on movie {5515} is {predicted_rating}')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 281,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "user: 347        item: 5515       r_ui = None   est = 1.42   {'was_impossible': False}\n",
-      "1.4150893670982523\n"
+      "user: 347        item: 5515       r_ui = None   est = 1.83   {'was_impossible': False}\n",
+      "1.83012299501901\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Prediction(uid=347, iid=5515, r_ui=None, est=1.4150893670982523, details={'was_impossible': False})"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
     "# sanity check my math matches Surprise package math\n",
-    "print(svd_algo.predict(347, 5515))\n",
+    "print(svd.predict(347, 5515))\n",
     "\n",
     "inner_uid = train_set.to_inner_uid(347)\n",
     "inner_iid = train_set.to_inner_iid(5515)\n",
-    "print(np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])) # surprise casts userId and movieId to inner ids\n",
-    "svd_algo.predict(347, 5515)"
+    "print(np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])) # surprise casts userId and movieId to inner ids"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Adding Movie Data\n",
     "while our collaborative filtering algorithm was trained solely on user's ratings of movies, and doesn't require any data about the movies themselves - like the title, genres, or release year - we'll want that information stored as metadata.\n",
     "\n",
     "We can grab this data from our `movies_metadata.csv` file, clean it, and join it to our user ratings via the `movieId` column"
@@ -318,7 +314,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 282,
    "metadata": {},
    "outputs": [
     {
@@ -558,7 +554,7 @@
        "[5 rows x 23 columns]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 282,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -570,7 +566,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 283,
    "metadata": {},
    "outputs": [
     {
@@ -594,7 +590,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 283,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -629,16 +625,113 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We'll eventually have to map these movies to their ratings, which we'll do so with the `links.csv` file that matches `movieId`, `imdbId`, and `tmdbId`.\n",
-    "\n",
+    "We'll have to map these movies to their ratings, which we'll do so with the `links.csv` file that matches `movieId`, `imdbId`, and `tmdbId`.\n",
     "Let's do that now."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 284,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "links_df = fetch_dataframe('links_small.csv')\n",
+    "movies_df = movies_df.merge(links_df, left_on='imdb_id', right_on='imdbId', how='inner')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll want to move our SVD user vectors and movie vectors and their corresponding userId and movieId into 2 dataframes for later processing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 285,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build a dataframe out of the user vectors and their userIds\n",
+    "user_vectors_and_ids = {train_set.to_raw_uid(inner_id): user_vectors[inner_id].tolist() for inner_id in train_set.all_users()}\n",
+    "user_vector_df = pd.Series(user_vectors_and_ids).to_frame('user_vector')\n",
+    "\n",
+    "# now do the same for the movie vectors and their movieIds\n",
+    "movie_vectors_and_ids = {train_set.to_raw_iid(inner_id): movie_vectors[inner_id].tolist() for inner_id in train_set.all_items()}\n",
+    "movie_vector_df = pd.Series(movie_vectors_and_ids).to_frame('movie_vector')\n",
+    "\n",
+    "# merge the movie vector series with the movies dataframe using the movieId and id fields\n",
+    "movies_df = movies_df.merge(movie_vector_df, left_on='movieId', right_index=True, how='inner')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## RedisVL Handles the Scale\n",
+    "\n",
+    "Especially for large datasets like the 45,000 movie catalog we're dealing with, you'll want Redis to do the heavy lifting of vector search.\n",
+    "All that's needed is to define the search index and load our data we've cleaned and merged with our vectors.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 286,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "20:56:23 redisvl.index.index INFO   Index already exists, overwriting.\n",
+      "20:56:23 redisvl.index.index INFO   Index already exists, overwriting.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from redis import Redis\n",
+    "from redisvl.schema import IndexSchema\n",
+    "from redisvl.index import SearchIndex\n",
+    "\n",
+    "client = Redis.from_url(REDIS_URL)\n",
+    "\n",
+    "movie_schema = IndexSchema.from_yaml(\"collaborative_filtering_schema.yaml\")\n",
+    "\n",
+    "movie_index = SearchIndex(movie_schema, redis_client=client)\n",
+    "movie_index.create(overwrite=True, drop=True)\n",
+    "\n",
+    "user_schema = IndexSchema.from_yaml(\"user_schema.yaml\")\n",
+    "\n",
+    "user_index = SearchIndex(user_schema, redis_client=client)\n",
+    "user_index.create(overwrite=True, drop=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 287,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keys = movie_index.load(movies_df.to_dict(orient='records'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 288,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "number of movies 8387\n",
+      "size of movie df 8387\n",
+      "unique movie ids 8381\n",
+      "unique movie titles 8150\n",
+      "unique movies rated 9065\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -678,6 +771,7 @@
        "      <th>movieId</th>\n",
        "      <th>imdbId</th>\n",
        "      <th>tmdbId</th>\n",
+       "      <th>movie_vector</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -701,6 +795,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
+       "      <td>[-0.023792249725276562, 0.1785839516922377, -0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -722,6 +817,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
+       "      <td>[0.2793838607565979, -0.21744939596620874, 0.1...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -743,6 +839,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
+       "      <td>[-0.020947681442077554, 0.20694515937091487, 0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -764,6 +861,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
+       "      <td>[0.04080238290985722, 0.07032878736373183, -0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -785,6 +883,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
+       "      <td>[-0.004196795084205664, -0.04584846941882623, ...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -826,124 +925,36 @@
        "3            Waiting to Exhale           6.1          34        4  114885   \n",
        "4  Father of the Bride Part II           5.7         173        5  113041   \n",
        "\n",
-       "    tmdbId  \n",
-       "0    862.0  \n",
-       "1   8844.0  \n",
-       "2  15602.0  \n",
-       "3  31357.0  \n",
-       "4  11862.0  "
+       "    tmdbId                                       movie_vector  \n",
+       "0    862.0  [-0.023792249725276562, 0.1785839516922377, -0...  \n",
+       "1   8844.0  [0.2793838607565979, -0.21744939596620874, 0.1...  \n",
+       "2  15602.0  [-0.020947681442077554, 0.20694515937091487, 0...  \n",
+       "3  31357.0  [0.04080238290985722, 0.07032878736373183, -0....  \n",
+       "4  11862.0  [-0.004196795084205664, -0.04584846941882623, ...  "
       ]
      },
-     "execution_count": 12,
+     "execution_count": 288,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "\n",
-    "links_df = fetch_dataframe('links_small.csv')\n",
-    "movies_df = movies_df.merge(links_df, left_on='imdb_id', right_on='imdbId', how='inner')\n",
-    "movies_df.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We'll want to move our SVD user vectors and movie vectors and their corresponding userId and movieId into 2 dataframes for later processing."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# build a dataframe out of the user vectors and their userIds\n",
-    "user_vectors_and_ids = {train_set.to_raw_uid(inner_id): user_vectors[inner_id].tolist() for inner_id in train_set.all_users()}\n",
-    "user_vector_df = pd.Series(user_vectors_and_ids).to_frame('user_vector')\n",
-    "\n",
-    "# now do the same for the movie vectors and their movieIds\n",
-    "movie_vectors_and_ids = {train_set.to_raw_iid(inner_id): movie_vectors[inner_id].tolist() for inner_id in train_set.all_items()}\n",
-    "movie_vector_df = pd.Series(movie_vectors_and_ids).to_frame('movie_vector')\n",
-    "\n",
-    "# merge the movie vector series with the movies dataframe using the movieId and id fields\n",
-    "movies_df = movies_df.merge(movie_vector_df, left_on='id', right_index=True, how='inner')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## RedisVL Handles the Scale\n",
-    "\n",
-    "Especially for large datasets like the 45,000 movie catalog we're dealing with, you'll want Redis to do the heavy lifting of vector search.\n",
-    "All that's needed is to define the search index and load our data we've cleaned and merged with our vectors.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from redis import Redis\n",
-    "from redisvl.schema import IndexSchema\n",
-    "from redisvl.index import SearchIndex\n",
-    "\n",
-    "client = Redis.from_url(REDIS_URL)\n",
-    "\n",
-    "movie_schema = IndexSchema.from_yaml(\"collaborative_filtering_schema.yaml\")\n",
-    "\n",
-    "movie_index = SearchIndex(movie_schema, redis_client=client)\n",
-    "movie_index.create(overwrite=True, drop=True)\n",
-    "\n",
-    "user_schema = IndexSchema.from_yaml(\"user_schema.yaml\")\n",
-    "\n",
-    "user_index = SearchIndex(user_schema, redis_client=client)\n",
-    "user_index.create(overwrite=True, drop=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "keys = movie_index.load(movies_df.to_dict(orient='records'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1494\n",
-      "28386\n",
-      "1494\n",
-      "1482\n",
-      "9065\n"
-     ]
-    }
-   ],
-   "source": [
-    "# sanity check I merged all my dataframes properly and have the right sizes of moives, users, vectors, ids, etc.\n",
+    "# sanity check we merged all my dataframes properly and have the right sizes of moives, users, vectors, ids, etc.\n",
     "number_of_movies = len(movies_df.to_dict(orient='records'))\n",
-    "size_of_movie_df = movies_df.size\n",
+    "size_of_movie_df = movies_df.shape[0]\n",
+    "\n",
+    "print('number of movies', number_of_movies)\n",
+    "print('size of movie df', size_of_movie_df)\n",
     "\n",
-    "print(number_of_movies)\n",
-    "print(size_of_movie_df)\n",
     "unique_movie_ids = movies_df['id'].nunique()\n",
-    "print(unique_movie_ids)\n",
+    "print('unique movie ids', unique_movie_ids)\n",
+    "\n",
     "unique_movie_titles = movies_df['title'].nunique()\n",
-    "print(unique_movie_titles)\n",
+    "print('unique movie titles', unique_movie_titles)\n",
     "\n",
     "unique_movies_rated = ratings_df['movieId'].nunique()\n",
-    "print(unique_movies_rated)"
+    "print('unique movies rated', unique_movies_rated)\n",
+    "movies_df.head()"
    ]
   },
   {
@@ -954,38 +965,38 @@
     "\n",
     "This is why in our `collaborative_filtering_schema.yaml` we use `ip` (inner product) as our distance metric.\n",
     "\n",
-    "It's also why we'll use our user vector as the query vector when we do a vector query."
+    "It's also why we'll use our user vector as the query vector when we do a query."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 289,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'movie:9df0babc731549909e929885973aee58', 'vector_distance': '-3.6087179184', 'title': 'The Million Dollar Hotel', 'genres': '[\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:ad9142a30fd045cfbb1e5ae0b051f4c8', 'vector_distance': '-3.56296348572', 'title': 'Terminator 3: Rise of the Machines', 'genres': '[\"Action\",\"Thriller\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:bf3354ae719b44c782876272c5f95ce1', 'vector_distance': '-3.52630567551', 'title': 'Beverly Hills Cop III', 'genres': '[\"Action\",\"Comedy\",\"Crime\"]'}\n",
-      "{'id': 'movie:88ce7f7738104c539b004135f306e9ec', 'vector_distance': '-3.46648168564', 'title': 'Backdraft', 'genres': '[\"Action\",\"Crime\",\"Drama\",\"Mystery\",\"Thriller\"]'}\n",
-      "{'id': 'movie:9fe2e20d887a4263a540f3945f10751d', 'vector_distance': '-3.40900659561', 'title': 'Boogie Nights', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:9cd57bacb5de437a88279a17c2161ce2', 'vector_distance': '-3.38699388504', 'title': 'The Good Thief', 'genres': '[\"Crime\",\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:5d7079fac9534a0585608c9b0d01ba80', 'vector_distance': '-3.3247923851', 'title': \"Pandora's Box\", 'genres': '[\"Drama\",\"Thriller\",\"Romance\"]'}\n",
-      "{'id': 'movie:fa9cf76285c348078bb9814fa6b9357f', 'vector_distance': '-3.31738758087', 'title': 'Dawn of the Dead', 'genres': '[\"Horror\"]'}\n",
-      "{'id': 'movie:0c27eb9238744640a318c57104df4ddb', 'vector_distance': '-3.309486866', 'title': 'Once Were Warriors', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:f490c86a71164bf3956d92be5de8ba05', 'vector_distance': '-3.30277919769', 'title': 'Light of Day', 'genres': '[\"Music\",\"Drama\"]'}\n",
-      "{'id': 'movie:e8ef474819814eaea6cb757449d3eded', 'vector_distance': '-3.28701210022', 'title': 'Beetlejuice', 'genres': '[\"Fantasy\",\"Comedy\"]'}\n",
-      "{'id': 'movie:0c596d8911e0498a854b4e6d5faae545', 'vector_distance': '-3.27935218811', 'title': 'Enough', 'genres': '[\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:d0d08cb2caa44b42a3f21af5687fb7dc', 'vector_distance': '-3.26998329163', 'title': 'Ghost Rider', 'genres': '[\"Thriller\",\"Action\",\"Fantasy\",\"Horror\"]'}\n",
-      "{'id': 'movie:b5bcd6dba0474b709ccb3b10c3e2fb14', 'vector_distance': '-3.26271867752', 'title': 'Cousin, Cousine', 'genres': '[\"Romance\",\"Comedy\"]'}\n",
-      "{'id': 'movie:5726b3476a2d450db8792217298b7b57', 'vector_distance': '-3.25473356247', 'title': \"We're No Angels\", 'genres': '[\"Comedy\",\"Crime\",\"Drama\"]'}\n",
-      "{'id': 'movie:f1459b204f054f8daa7bb03e349d4bc1', 'vector_distance': '-3.25318956375', 'title': 'Gremlins 2: The New Batch', 'genres': '[\"Comedy\",\"Horror\",\"Fantasy\"]'}\n",
-      "{'id': 'movie:1cb9f1fb5d3f45a3861607aca03dfd4d', 'vector_distance': '-3.20173215866', 'title': 'Sleepless in Seattle', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:a567595136ef488b90637b77afb10664', 'vector_distance': '-3.19421386719', 'title': 'Point Break', 'genres': '[\"Action\",\"Thriller\",\"Crime\"]'}\n",
-      "{'id': 'movie:776e7891798048629d2dfa532ace8ff5', 'vector_distance': '-3.1903834343', 'title': \"My Best Friend's Wedding\", 'genres': '[\"Comedy\",\"Romance\"]'}\n",
-      "{'id': 'movie:6bcc04c814d24da6926161c9e0c10a76', 'vector_distance': '-3.17692661285', 'title': 'Cool Hand Luke', 'genres': '[\"Crime\",\"Drama\"]'}\n"
+      "{'id': 'movie:ea1eb7855f474e1190fc997697717bce', 'vector_distance': '-3.16834640503', 'title': 'Terminator 2: Judgment Day', 'genres': '[\"Action\",\"Thriller\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:882a7c1429d24473abb22ad8a7294a74', 'vector_distance': '-3.07514286041', 'title': 'Pearl Harbor', 'genres': '[\"History\",\"Romance\",\"War\"]'}\n",
+      "{'id': 'movie:df31a1ffc0a3432fb1d6952c74a14bdc', 'vector_distance': '-3.04155731201', 'title': 'Girl, Interrupted', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:ea1778357c934222a84099ab488503e7', 'vector_distance': '-2.99355602264', 'title': 'Cruel Intentions', 'genres': '[\"Drama\",\"Romance\",\"Thriller\"]'}\n",
+      "{'id': 'movie:df965ebd8dfa482dbcf184e9a4234f0a', 'vector_distance': '-2.97833967209', 'title': 'Remember the Titans', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:5bd9e34ff7394e82b2508087543008be', 'vector_distance': '-2.91204404831', 'title': 'The Quiet American', 'genres': '[\"Drama\",\"Action\",\"Thriller\",\"Romance\"]'}\n",
+      "{'id': 'movie:ca5dcbe4ce844f74989294c4fc3ed61f', 'vector_distance': '-2.90972471237', 'title': 'The Departed', 'genres': '[\"Drama\",\"Thriller\",\"Crime\"]'}\n",
+      "{'id': 'movie:5efbb2346520455aaef3ae51fb9de029', 'vector_distance': '-2.9093708992', 'title': 'Run Lola Run', 'genres': '[\"Action\",\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:3b51e695ae084eb0ace87c2f48235fe8', 'vector_distance': '-2.89018774033', 'title': 'Gladiator', 'genres': '[\"Action\",\"Drama\",\"Adventure\"]'}\n",
+      "{'id': 'movie:207756fd3d2f4e5599b8d149bf5c6b59', 'vector_distance': '-2.88189530373', 'title': 'Big Fish', 'genres': '[\"Adventure\",\"Fantasy\",\"Drama\"]'}\n",
+      "{'id': 'movie:1cb9cfa6294f48e8942becbe29af4765', 'vector_distance': '-2.87604212761', 'title': 'The Wrong Trousers', 'genres': '[\"Animation\",\"Comedy\",\"Family\"]'}\n",
+      "{'id': 'movie:ef35f7f4b72646b49e377e06f8dc7dbf', 'vector_distance': '-2.86843323708', 'title': 'The African Queen', 'genres': '[\"Adventure\",\"War\",\"Romance\"]'}\n",
+      "{'id': 'movie:c1f1b760faed43f09100a725b7420510', 'vector_distance': '-2.8342666626', 'title': 'Crumb', 'genres': '[\"Documentary\"]'}\n",
+      "{'id': 'movie:cdf6b6911dcd4378a69fa9ce2979b7a6', 'vector_distance': '-2.83272624016', 'title': 'Heathers', 'genres': '[\"Thriller\",\"Comedy\",\"Drama\"]'}\n",
+      "{'id': 'movie:5569250c1da8423ba4644c899a0523b5', 'vector_distance': '-2.83206033707', 'title': 'Murder in the First', 'genres': '[\"Crime\",\"Drama\"]'}\n",
+      "{'id': 'movie:a297851d857242f4b7cd8679c8d2c2f9', 'vector_distance': '-2.82705926895', 'title': 'Angels and Insects', 'genres': '[\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:ff792c59ab7045038f84e1ceefba1652', 'vector_distance': '-2.81982970238', 'title': 'Beauty and the Beast', 'genres': '[\"Romance\",\"Family\",\"Animation\",\"Fantasy\",\"Music\"]'}\n",
+      "{'id': 'movie:e6a67cfe875e452c8f0239f5d484fe08', 'vector_distance': '-2.81556129456', 'title': 'Gladiator 1992', 'genres': '[\"Action\",\"Drama\"]'}\n",
+      "{'id': 'movie:9f7d1fbd89a8477db90147d382d7f53c', 'vector_distance': '-2.80134963989', 'title': 'Band of Brothers', 'genres': '[\"Action\",\"Drama\",\"War\"]'}\n",
+      "{'id': 'movie:bb9113c4eb0a4e798eea4ccbb3cdf448', 'vector_distance': '-2.79896092415', 'title': 'Life Is Beautiful', 'genres': '[\"Comedy\",\"Drama\"]'}\n"
      ]
     }
    ],
@@ -998,11 +1009,11 @@
     "# the distance metric 'ip' inner product is computing \"score = 1 - u * v\" and returning the minimum, which corresponds to the max of \"u * v\"\n",
     "# this is what we want. The predicted rating on a scale of 0 to 5 is then -(score - 1) == -score + 1\n",
     "query = RangeQuery(vector=user_vector,\n",
-    "                   vector_field_name='movie_vector',\n",
-    "                  num_results=20,\n",
-    "                  return_score=True,\n",
-    "                  return_fields=['title', 'genres']\n",
-    "                  )\n",
+    "                    vector_field_name='movie_vector',\n",
+    "                    num_results=20,\n",
+    "                    return_score=True,\n",
+    "                    return_fields=['title', 'genres']\n",
+    "                    )\n",
     "\n",
     "results = movie_index.query(query)\n",
     "\n",
@@ -1012,7 +1023,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 290,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1024,7 +1035,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 291,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1033,12 +1044,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 292,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Deleted 4387 keys\n",
+      "Deleted 2000 keys\n",
+      "Deleted 1000 keys\n",
+      "Deleted 500 keys\n",
+      "Deleted 500 keys\n"
+     ]
+    }
+   ],
    "source": [
     "# clean up your index\n",
-    "\n",
     "while remaining := movie_index.clear():\n",
     "    print(f\"Deleted {remaining} keys\")"
    ]
diff --git a/python-recipes/recommendation-systems/user_schema.yaml b/python-recipes/recommendation-systems/user_schema.yaml
index e89bd6a0..6d5c9ebd 100644
--- a/python-recipes/recommendation-systems/user_schema.yaml
+++ b/python-recipes/recommendation-systems/user_schema.yaml
@@ -4,7 +4,7 @@ index:
     storage_type: json
 
 fields:
-    - name: usr_id
+    - name: user_id
       type: tag
     - name: ratings
       type: numeric

From ff02e4933715be559c4f7341c16cfcce360a6fff Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Sat, 12 Oct 2024 19:06:16 -0700
Subject: [PATCH 04/12] wip: adding bloom filtering to collaborative notebook

---
 .../collaborative_filtering.ipynb             | 506 +++++++++++++++---
 1 file changed, 430 insertions(+), 76 deletions(-)

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index 9977d2ef..01e97b33 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 274,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 275,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 276,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -89,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 277,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -119,7 +119,7 @@
     "A lot is going to happen in the code cell below. We split our full data into train and test sets. We defined the collaborative filtering algorithm to use, which in this case is the Singular Value Decomposition (SVD) algorithm. lastly, we fit our model to our data.\n",
     "\n",
     "It's worth going into more detail why we chose this algorithm and what it is computing in the `svd.fit(train_set)` method we're calling.\n",
-    "First, let's think about what data it's receiving - our ratings data. This only contains the user_ids, movie_ids, and the user's ratings of their watched movies on a scale of 1 to 5.\n",
+    "First, let's think about what data it's receiving - our ratings data. This only contains the userIds, movieIds, and the user's ratings of their watched movies on a scale of 1 to 5.\n",
     "\n",
     "We can put this data into a matrix with rows being users and columns being movies\n",
     "\n",
@@ -176,16 +176,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 278,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x33873df10>"
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x16cb1db50>"
       ]
      },
-     "execution_count": 278,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -224,7 +224,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 279,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -232,7 +232,7 @@
      "output_type": "stream",
      "text": [
       "we have 671 users with feature vectors of size 100\n",
-      "we have 8435 movies with feature vectors of size 100\n"
+      "we have 8398 movies with feature vectors of size 100\n"
      ]
     }
    ],
@@ -258,21 +258,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 280,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "the predicted rating of user 347 on movie 5515 is 1.83012299501901\n"
+      "the predicted rating of user 347 on movie 5515 is 1.2662407571780765\n"
      ]
     }
    ],
    "source": [
     "# surprise casts userId and movieId to inner ids, so we have to use their mapping to now which rows to use\n",
-    "inner_uid = train_set.to_inner_uid(347) # user_id\n",
-    "inner_iid = train_set.to_inner_iid(5515) # movie_id\n",
+    "inner_uid = train_set.to_inner_uid(347) # userId\n",
+    "inner_iid = train_set.to_inner_iid(5515) # movieId\n",
     "\n",
     "# predict one user's rating of one film\n",
     "predicted_rating = np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])\n",
@@ -281,15 +281,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 281,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "user: 347        item: 5515       r_ui = None   est = 1.83   {'was_impossible': False}\n",
-      "1.83012299501901\n"
+      "user: 347        item: 5515       r_ui = None   est = 1.27   {'was_impossible': False}\n",
+      "1.2662407571780765\n"
      ]
     }
    ],
@@ -314,7 +314,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 282,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -554,7 +554,7 @@
        "[5 rows x 23 columns]"
       ]
      },
-     "execution_count": 282,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -566,7 +566,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 283,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -590,7 +590,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 283,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -631,7 +631,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 284,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -649,7 +649,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 285,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -677,15 +677,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 286,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "20:56:23 redisvl.index.index INFO   Index already exists, overwriting.\n",
-      "20:56:23 redisvl.index.index INFO   Index already exists, overwriting.\n"
+      "18:57:45 redisvl.index.index INFO   Index already exists, overwriting.\n",
+      "18:57:45 redisvl.index.index INFO   Index already exists, overwriting.\n"
      ]
     }
    ],
@@ -709,7 +709,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 287,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -718,17 +718,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 288,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "number of movies 8387\n",
-      "size of movie df 8387\n",
-      "unique movie ids 8381\n",
-      "unique movie titles 8150\n",
+      "number of movies 8351\n",
+      "size of movie df 8351\n",
+      "unique movie ids 8347\n",
+      "unique movie titles 8104\n",
       "unique movies rated 9065\n"
      ]
     },
@@ -795,7 +795,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[-0.023792249725276562, 0.1785839516922377, -0...</td>\n",
+       "      <td>[0.003070617914363312, -0.2183623175004815, -0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -817,7 +817,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[0.2793838607565979, -0.21744939596620874, 0.1...</td>\n",
+       "      <td>[0.013404150790652358, -0.1920666231028718, -0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -839,7 +839,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[-0.020947681442077554, 0.20694515937091487, 0...</td>\n",
+       "      <td>[0.17041991275371088, -0.14362645391937717, -0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -861,7 +861,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[0.04080238290985722, 0.07032878736373183, -0....</td>\n",
+       "      <td>[0.029246177676017816, -0.19591132539475606, -...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -883,7 +883,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[-0.004196795084205664, -0.04584846941882623, ...</td>\n",
+       "      <td>[-0.03755917677168938, -0.17405036529466641, 0...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -926,14 +926,14 @@
        "4  Father of the Bride Part II           5.7         173        5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [-0.023792249725276562, 0.1785839516922377, -0...  \n",
-       "1   8844.0  [0.2793838607565979, -0.21744939596620874, 0.1...  \n",
-       "2  15602.0  [-0.020947681442077554, 0.20694515937091487, 0...  \n",
-       "3  31357.0  [0.04080238290985722, 0.07032878736373183, -0....  \n",
-       "4  11862.0  [-0.004196795084205664, -0.04584846941882623, ...  "
+       "0    862.0  [0.003070617914363312, -0.2183623175004815, -0...  \n",
+       "1   8844.0  [0.013404150790652358, -0.1920666231028718, -0...  \n",
+       "2  15602.0  [0.17041991275371088, -0.14362645391937717, -0...  \n",
+       "3  31357.0  [0.029246177676017816, -0.19591132539475606, -...  \n",
+       "4  11862.0  [-0.03755917677168938, -0.17405036529466641, 0...  "
       ]
      },
-     "execution_count": 288,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -965,44 +965,43 @@
     "\n",
     "This is why in our `collaborative_filtering_schema.yaml` we use `ip` (inner product) as our distance metric.\n",
     "\n",
-    "It's also why we'll use our user vector as the query vector when we do a query."
+    "It's also why we'll use our user vector as the query vector when we do a query. Let's pick a random user and their corresponding user vector to see what this looks like."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 289,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'movie:ea1eb7855f474e1190fc997697717bce', 'vector_distance': '-3.16834640503', 'title': 'Terminator 2: Judgment Day', 'genres': '[\"Action\",\"Thriller\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:882a7c1429d24473abb22ad8a7294a74', 'vector_distance': '-3.07514286041', 'title': 'Pearl Harbor', 'genres': '[\"History\",\"Romance\",\"War\"]'}\n",
-      "{'id': 'movie:df31a1ffc0a3432fb1d6952c74a14bdc', 'vector_distance': '-3.04155731201', 'title': 'Girl, Interrupted', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:ea1778357c934222a84099ab488503e7', 'vector_distance': '-2.99355602264', 'title': 'Cruel Intentions', 'genres': '[\"Drama\",\"Romance\",\"Thriller\"]'}\n",
-      "{'id': 'movie:df965ebd8dfa482dbcf184e9a4234f0a', 'vector_distance': '-2.97833967209', 'title': 'Remember the Titans', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:5bd9e34ff7394e82b2508087543008be', 'vector_distance': '-2.91204404831', 'title': 'The Quiet American', 'genres': '[\"Drama\",\"Action\",\"Thriller\",\"Romance\"]'}\n",
-      "{'id': 'movie:ca5dcbe4ce844f74989294c4fc3ed61f', 'vector_distance': '-2.90972471237', 'title': 'The Departed', 'genres': '[\"Drama\",\"Thriller\",\"Crime\"]'}\n",
-      "{'id': 'movie:5efbb2346520455aaef3ae51fb9de029', 'vector_distance': '-2.9093708992', 'title': 'Run Lola Run', 'genres': '[\"Action\",\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:3b51e695ae084eb0ace87c2f48235fe8', 'vector_distance': '-2.89018774033', 'title': 'Gladiator', 'genres': '[\"Action\",\"Drama\",\"Adventure\"]'}\n",
-      "{'id': 'movie:207756fd3d2f4e5599b8d149bf5c6b59', 'vector_distance': '-2.88189530373', 'title': 'Big Fish', 'genres': '[\"Adventure\",\"Fantasy\",\"Drama\"]'}\n",
-      "{'id': 'movie:1cb9cfa6294f48e8942becbe29af4765', 'vector_distance': '-2.87604212761', 'title': 'The Wrong Trousers', 'genres': '[\"Animation\",\"Comedy\",\"Family\"]'}\n",
-      "{'id': 'movie:ef35f7f4b72646b49e377e06f8dc7dbf', 'vector_distance': '-2.86843323708', 'title': 'The African Queen', 'genres': '[\"Adventure\",\"War\",\"Romance\"]'}\n",
-      "{'id': 'movie:c1f1b760faed43f09100a725b7420510', 'vector_distance': '-2.8342666626', 'title': 'Crumb', 'genres': '[\"Documentary\"]'}\n",
-      "{'id': 'movie:cdf6b6911dcd4378a69fa9ce2979b7a6', 'vector_distance': '-2.83272624016', 'title': 'Heathers', 'genres': '[\"Thriller\",\"Comedy\",\"Drama\"]'}\n",
-      "{'id': 'movie:5569250c1da8423ba4644c899a0523b5', 'vector_distance': '-2.83206033707', 'title': 'Murder in the First', 'genres': '[\"Crime\",\"Drama\"]'}\n",
-      "{'id': 'movie:a297851d857242f4b7cd8679c8d2c2f9', 'vector_distance': '-2.82705926895', 'title': 'Angels and Insects', 'genres': '[\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:ff792c59ab7045038f84e1ceefba1652', 'vector_distance': '-2.81982970238', 'title': 'Beauty and the Beast', 'genres': '[\"Romance\",\"Family\",\"Animation\",\"Fantasy\",\"Music\"]'}\n",
-      "{'id': 'movie:e6a67cfe875e452c8f0239f5d484fe08', 'vector_distance': '-2.81556129456', 'title': 'Gladiator 1992', 'genres': '[\"Action\",\"Drama\"]'}\n",
-      "{'id': 'movie:9f7d1fbd89a8477db90147d382d7f53c', 'vector_distance': '-2.80134963989', 'title': 'Band of Brothers', 'genres': '[\"Action\",\"Drama\",\"War\"]'}\n",
-      "{'id': 'movie:bb9113c4eb0a4e798eea4ccbb3cdf448', 'vector_distance': '-2.79896092415', 'title': 'Life Is Beautiful', 'genres': '[\"Comedy\",\"Drama\"]'}\n"
+      "{'id': 'movie:9a77231d27154ea1a678907d8e2c31ee', 'vector_distance': '-3.15922021866', 'title': 'Forrest Gump', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:51a38fb8f13f4726a8019a8d66f2b05b', 'vector_distance': '-3.15213918686', 'title': 'Cool Hand Luke', 'genres': '[\"Crime\",\"Drama\"]'}\n",
+      "{'id': 'movie:2e487266815945b0b8b857a848292e3e', 'vector_distance': '-3.07703495026', 'title': 'The Shawshank Redemption', 'genres': '[\"Drama\",\"Crime\"]'}\n",
+      "{'id': 'movie:c048199f45d340e282e37ff1c54089ca', 'vector_distance': '-3.04389858246', 'title': 'Lock, Stock and Two Smoking Barrels', 'genres': '[\"Comedy\",\"Crime\"]'}\n",
+      "{'id': 'movie:0996165a33924a79beb757bcefc17ed3', 'vector_distance': '-3.03677082062', 'title': 'Return of the Jedi', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:00e1ad78f47745b6a0e41b101eb98ff0', 'vector_distance': '-3.01881790161', 'title': 'In the Line of Fire', 'genres': '[\"Action\",\"Drama\",\"Thriller\",\"Crime\",\"Mystery\"]'}\n",
+      "{'id': 'movie:8fd381ca8404447caf53b63a901c69eb', 'vector_distance': '-3.01792764664', 'title': \"Mr. Holland's Opus\", 'genres': '[\"Music\",\"Drama\",\"Family\"]'}\n",
+      "{'id': 'movie:139f8da6282541a3a5881c7c457dd5ea', 'vector_distance': '-3.00384759903', 'title': 'Lifeboat', 'genres': '[\"Drama\",\"War\"]'}\n",
+      "{'id': 'movie:600e4f66decf454587888263b3fd7c6c', 'vector_distance': '-3.00213766098', 'title': 'Fargo', 'genres': '[\"Crime\",\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:657859724d944786afa80a74a8ba7590', 'vector_distance': '-3.00026941299', 'title': 'Crumb', 'genres': '[\"Documentary\"]'}\n",
+      "{'id': 'movie:a8a6ce2be2e547d7be27cfcd3c4b412e', 'vector_distance': '-2.98649430275', 'title': 'Much Ado About Nothing', 'genres': '[\"Drama\",\"Comedy\",\"Romance\"]'}\n",
+      "{'id': 'movie:b77ab676009448b8b31f05cfad877d78', 'vector_distance': '-2.96709799767', 'title': 'Dead Man Walking', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:203d662a71db49b7afc86e101ed3b61c', 'vector_distance': '-2.96438765526', 'title': 'Star Wars', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:4355e3c0948943db8b9c1774e3c616a2', 'vector_distance': '-2.94901204109', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
+      "{'id': 'movie:81dc7920656a4d389767fd13c2cb24b3', 'vector_distance': '-2.92887830734', 'title': 'Sunset Boulevard', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:42d61bf4d1fc42a498fdb2985cd402dd', 'vector_distance': '-2.92803192139', 'title': 'Cinema Paradiso', 'genres': '[\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:0673b1bb33a144b78397fff8939ec758', 'vector_distance': '-2.92664003372', 'title': 'Eat Drink Man Woman', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:8bbac427a25643d48a0582a411f2ce71', 'vector_distance': '-2.92609977722', 'title': \"Ocean's Eleven\", 'genres': '[\"Thriller\",\"Crime\"]'}\n",
+      "{'id': 'movie:ead41675cf414eca86c3e995f398b09e', 'vector_distance': '-2.92416906357', 'title': 'Three Colors: Red', 'genres': '[\"Drama\",\"Mystery\",\"Romance\"]'}\n",
+      "{'id': 'movie:9843a33a6dd44f46ad475808cebb06fe', 'vector_distance': '-2.91586065292', 'title': 'Ponyo', 'genres': '[\"Animation\",\"Family\"]'}\n"
      ]
     }
    ],
    "source": [
-    "from redisvl.query import RangeQuery, FilterQuery\n",
-    "from redisvl.query.filter import Tag, Num, Text\n",
+    "from redisvl.query import RangeQuery\n",
     "\n",
     "user_vector = user_vectors[352].tolist()\n",
     "\n",
@@ -1021,49 +1020,404 @@
     "    print(r)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Adding All the Bells & Whistles\n",
+    "Vector search handles the bulk of our collaborative filtering recommendation system and is a great approach to generating personalized recommendations that are unique to each user.\n",
+    "\n",
+    "To up our RecSys game even further we can leverage RedisVl filter logic to give more control to what users are shown. Why have only one feed of recommended movies when you can have several, each with its own theme and personalized to each user."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 290,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
     "\n",
-    "## EVALUATE MOVE TO COLLABORATIVE FILTERING SO WE CAN SHOW BETTER NUMBERS\n",
-    "#let's see how well this works. we can choose some users, and based on their first watched movie we can recommend them some more.\n",
-    "#we can then look at the set intersection between our recommendations and the movies they actually watched (and rated highly) to see how well we did."
+    "from redisvl.query.filter import Tag, Num, Text\n",
+    "\n",
+    "def get_recommendations(user_id, filters=None, num_results=10):\n",
+    "    user_vector = user_vectors[user_id].tolist()\n",
+    "    query = RangeQuery(vector=user_vector,\n",
+    "                       vector_field_name='movie_vector',\n",
+    "                       num_results=num_results,\n",
+    "                       filter_expression=filters,\n",
+    "                       return_fields=['title', 'overview', 'genres'])\n",
+    "\n",
+    "    results = movie_index.query(query)\n",
+    "\n",
+    "    return [(r['title'], r['overview'], r['genres'], r['vector_distance']) for r in results]\n",
+    "\n",
+    "Top_picks_for_you = get_recommendations(user_id=42) # general SVD results, no filter\n",
+    "\n",
+    "block_buster_filter = Num('revenue') > 30_000_000\n",
+    "block_buster_hits = get_recommendations(user_id=42, filters=block_buster_filter)\n",
+    "\n",
+    "classics_filter = Num('release_date') < datetime.datetime(1990, 1, 1).timestamp()\n",
+    "classics = get_recommendations(user_id=42, filters=classics_filter)\n",
+    "\n",
+    "popular_filter = (Num('popularity') > 50) & (Num('vote_average') > 7)\n",
+    "Whats_popular = get_recommendations(user_id=42, filters=popular_filter)\n",
+    "\n",
+    "indie_filter = (Num('revenue') < 1_000_000) & (Num('popularity') > 10)\n",
+    "indie_hits = get_recommendations(user_id=42, filters=indie_filter)\n",
+    "\n",
+    "fruity = Text('title') % 'apple|orange|peach|banana|grape|pineapple'\n",
+    "fruity_films = get_recommendations(user_id=42, filters=fruity)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 291,
+   "execution_count": 25,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>top picks</th>\n",
+       "      <th>block busters</th>\n",
+       "      <th>classics</th>\n",
+       "      <th>what's popular</th>\n",
+       "      <th>indie hits</th>\n",
+       "      <th>fruity films</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>Pulp Fiction</td>\n",
+       "      <td>All About Eve</td>\n",
+       "      <td>The Grapes of Wrath</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Das Boot</td>\n",
+       "      <td>Das Boot</td>\n",
+       "      <td>Das Boot</td>\n",
+       "      <td>The Shawshank Redemption</td>\n",
+       "      <td>The Postman</td>\n",
+       "      <td>What's Eating Gilbert Grape</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Amadeus</td>\n",
+       "      <td>Amadeus</td>\n",
+       "      <td>Amadeus</td>\n",
+       "      <td>Gone Girl</td>\n",
+       "      <td>Bicycle Thieves</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Fargo</td>\n",
+       "      <td>Fargo</td>\n",
+       "      <td>Dr. Strangelove or: How I Learned to Stop Worr...</td>\n",
+       "      <td>Dawn of the Planet of the Apes</td>\n",
+       "      <td>My Neighbor Totoro</td>\n",
+       "      <td>Bananas</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Dr. Strangelove or: How I Learned to Stop Worr...</td>\n",
+       "      <td>Shakespeare in Love</td>\n",
+       "      <td>Cinema Paradiso</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>The Wild Bunch</td>\n",
+       "      <td>Pineapple Express</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Cinema Paradiso</td>\n",
+       "      <td>The Last Emperor</td>\n",
+       "      <td>Take the Money and Run</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>M</td>\n",
+       "      <td>James and the Giant Peach</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Take the Money and Run</td>\n",
+       "      <td>The Color Purple</td>\n",
+       "      <td>The Last Emperor</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>Rebel Without a Cause</td>\n",
+       "      <td>The Apple Dumpling Gang</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Shakespeare in Love</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Raging Bull</td>\n",
+       "      <td>Big Hero 6</td>\n",
+       "      <td>Withnail &amp; I</td>\n",
+       "      <td>Adam's Apples</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>The Last Emperor</td>\n",
+       "      <td>Annie Hall</td>\n",
+       "      <td>The Color Purple</td>\n",
+       "      <td>Guardians of the Galaxy</td>\n",
+       "      <td>Meet John Doe</td>\n",
+       "      <td>Orange County</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Raging Bull</td>\n",
+       "      <td>The Piano</td>\n",
+       "      <td>North by Northwest</td>\n",
+       "      <td>Captain America: Civil War</td>\n",
+       "      <td>Once Upon a Time in America</td>\n",
+       "      <td>Herbie Goes Bananas</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                           top picks        block busters  \\\n",
+       "0                                       The Graduate         The Graduate   \n",
+       "1                                           Das Boot             Das Boot   \n",
+       "2                                            Amadeus              Amadeus   \n",
+       "3                                              Fargo                Fargo   \n",
+       "4  Dr. Strangelove or: How I Learned to Stop Worr...  Shakespeare in Love   \n",
+       "5                                    Cinema Paradiso     The Last Emperor   \n",
+       "6                             Take the Money and Run     The Color Purple   \n",
+       "7                                Shakespeare in Love            Manhattan   \n",
+       "8                                   The Last Emperor           Annie Hall   \n",
+       "9                                        Raging Bull            The Piano   \n",
+       "\n",
+       "                                            classics  \\\n",
+       "0                                       The Graduate   \n",
+       "1                                           Das Boot   \n",
+       "2                                            Amadeus   \n",
+       "3  Dr. Strangelove or: How I Learned to Stop Worr...   \n",
+       "4                                    Cinema Paradiso   \n",
+       "5                             Take the Money and Run   \n",
+       "6                                   The Last Emperor   \n",
+       "7                                        Raging Bull   \n",
+       "8                                   The Color Purple   \n",
+       "9                                 North by Northwest   \n",
+       "\n",
+       "                   what's popular                   indie hits  \\\n",
+       "0                    Pulp Fiction                All About Eve   \n",
+       "1        The Shawshank Redemption                  The Postman   \n",
+       "2                       Gone Girl              Bicycle Thieves   \n",
+       "3  Dawn of the Planet of the Apes           My Neighbor Totoro   \n",
+       "4                      Fight Club               The Wild Bunch   \n",
+       "5                    Blade Runner                            M   \n",
+       "6                        Whiplash        Rebel Without a Cause   \n",
+       "7                      Big Hero 6                 Withnail & I   \n",
+       "8         Guardians of the Galaxy                Meet John Doe   \n",
+       "9      Captain America: Civil War  Once Upon a Time in America   \n",
+       "\n",
+       "                  fruity films  \n",
+       "0          The Grapes of Wrath  \n",
+       "1  What's Eating Gilbert Grape  \n",
+       "2           A Clockwork Orange  \n",
+       "3                      Bananas  \n",
+       "4            Pineapple Express  \n",
+       "5    James and the Giant Peach  \n",
+       "6      The Apple Dumpling Gang  \n",
+       "7                Adam's Apples  \n",
+       "8                Orange County  \n",
+       "9          Herbie Goes Bananas  "
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# put all these titles into a single pandas dataframe , where each column is one category\n",
+    "all_recommendations = pd.DataFrame(columns=[\"top picks\", \"block busters\", \"classics\", \"what's popular\", \"indie hits\", \"fruity films\"])\n",
+    "all_recommendations[\"top picks\"] = [m[0] for m in Top_picks_for_you]\n",
+    "all_recommendations[\"block busters\"] = [m[0] for m in block_buster_hits]\n",
+    "all_recommendations[\"classics\"] = [m[0] for m in classics]\n",
+    "all_recommendations[\"what's popular\"] = [m[0] for m in Whats_popular]\n",
+    "all_recommendations[\"indie hits\"] = [m[0] for m in indie_hits]\n",
+    "all_recommendations[\"fruity films\"] = [m[0] for m in fruity_films]\n",
+    "\n",
+    "all_recommendations.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Keeping Things Fresh\n",
+    "You've probably noticed that a few movies get repeated in these lists. That's not surprising as all our results are personalized and things like `popularity` and `user_rating` and `revenue` are likely highly correlated. And it's more that likely that at least some of the recommendations we're expecting to be highly rated by a given user is one they've already watched and rated highly.\n",
+    "\n",
+    "Luckily Redis offers an easy anwer to keeping recommendations new and interesting, and that answer is Bloom Filters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'bool' object has no attribute 'add'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[34], line 40\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[38;5;28mfilter\u001b[39m \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mbf()\u001b[38;5;241m.\u001b[39mcreate(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124muser_watched_list:\u001b[39m\u001b[38;5;132;01m{user_id}\u001b[39;00m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;241m0.01\u001b[39m, \u001b[38;5;241m1000\u001b[39m)\n\u001b[1;32m     39\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m movie_id \u001b[38;5;129;01min\u001b[39;00m watched_movies:\n\u001b[0;32m---> 40\u001b[0m     \u001b[38;5;28;43mfilter\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd\u001b[49m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00muser_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmovie_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     42\u001b[0m Top_picks_for_you \u001b[38;5;241m=\u001b[39m get_unique_recommendations(user_id\u001b[38;5;241m=\u001b[39muser_id)  \u001b[38;5;66;03m# general SVD results, no filter\u001b[39;00m\n\u001b[1;32m     43\u001b[0m block_buster_hits \u001b[38;5;241m=\u001b[39m get_unique_recommendations(user_id\u001b[38;5;241m=\u001b[39muser_id, filters\u001b[38;5;241m=\u001b[39mblock_buster_filter)\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'bool' object has no attribute 'add'"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "# create a bloom filter for a given user and add their watched list to it\n",
+    "def create_bloom_filter(user_id, watched_movies):\n",
+    "    if not client.bf().exists(f\"user_watched_list\"):\n",
+    "        filter = client.bf().create(f\"user_watched_list\", 0.01, 1000)\n",
+    "    for movie_id in watched_movies:\n",
+    "        client.bf().add(f\"user_watched_list\", f\"{user_id}:{movie_id}\")\n",
+    "    return filter\n",
+    "\n",
+    "# rewrite the get_recommendations() function to use a bloom filter and apply it before we return results\n",
+    "def get_unique_recommendations(user_id, filters=None, num_results=10):\n",
+    "    user_vector = user_vectors[user_id].tolist()\n",
+    "    bloom_filter_name = f\"user:{user_id}:watched\"\n",
+    "\n",
+    "    query = RangeQuery(vector=user_vector,\n",
+    "                       vector_field_name='movie_vector',\n",
+    "                       num_results=num_results * 2,  # fetch more results to filter out watched movies\n",
+    "                       filter_expression=filters,\n",
+    "                       #return_fields=['title', 'overview', 'genres', 'movie_id'])\n",
+    "                       return_fields=['title',  'movieId'])\n",
+    "\n",
+    "    results = movie_index.query(query)\n",
+    "\n",
+    "    # filter out movies that the user has already watched\n",
+    "    recommendations = []\n",
+    "    for r in results:\n",
+    "        print(r)\n",
+    "        if not bloom_client.bfExists(bloom_filter_name, r['movieId']):\n",
+    "            recommendations.append((r['title'], r['overview'], r['genres'], r['vector_distance']))\n",
+    "        if len(recommendations) >= num_results:\n",
+    "            break\n",
+    "\n",
+    "    return recommendations\n",
+    "\n",
+    "# example usage\n",
+    "user_id = 42\n",
+    "watched_movies = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()\n",
+    "\n",
+    "filter = client.bf().create('user_watched_list:{user_id}', 0.01, 1000)\n",
+    "for movie_id in watched_movies:\n",
+    "    filter.add(f'{user_id}:{movie_id}')\n",
+    "\n",
+    "Top_picks_for_you = get_unique_recommendations(user_id=user_id)  # general SVD results, no filter\n",
+    "block_buster_hits = get_unique_recommendations(user_id=user_id, filters=block_buster_filter)\n",
+    "classics = get_unique_recommendations(user_id=user_id, filters=classics_filter)\n",
+    "Whats_popular = get_unique_recommendations(user_id=user_id, filters=popular_filter)\n",
+    "indie_hits = get_unique_recommendations(user_id=user_id, filters=indie_filter)\n",
+    "fruity_films = get_unique_recommendations(user_id=user_id, filters=fruity)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "ruby"
+    }
+   },
    "outputs": [],
    "source": [
-    "# TODO use bloom filter and/or cuckoo filter with the recommendations and user's watched_list in their index to filter out movies they already watched"
+    "# put all these titles into a single pandas dataframe , where each column is one category\n",
+    "all_recommendations = pd.DataFrame(columns=[\"top picks\", \"block busters\", \"classics\", \"what's popular\", \"indie hits\", \"fruity films\"])\n",
+    "all_recommendations[\"top picks\"] = [m[0] for m in Top_picks_for_you]\n",
+    "all_recommendations[\"block busters\"] = [m[0] for m in block_buster_hits]\n",
+    "all_recommendations[\"classics\"] = [m[0] for m in classics]\n",
+    "all_recommendations[\"what's popular\"] = [m[0] for m in Whats_popular]\n",
+    "all_recommendations[\"indie hits\"] = [m[0] for m in indie_hits]\n",
+    "all_recommendations[\"fruity films\"] = [m[0] for m in fruity_films]\n",
+    "\n",
+    "all_recommendations.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "That's it! That's all it takes to build a highly scalable, personalized, customizable collaborative filtering recommendation system with Redis and RedisVL.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 292,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Deleted 4387 keys\n",
+      "Deleted 4351 keys\n",
       "Deleted 2000 keys\n",
       "Deleted 1000 keys\n",
       "Deleted 500 keys\n",
       "Deleted 500 keys\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "# clean up your index\n",
     "while remaining := movie_index.clear():\n",
-    "    print(f\"Deleted {remaining} keys\")"
+    "    print(f\"Deleted {remaining} keys\")\n",
+    "\n",
+    "while remaining := user_index.clear():\n",
+    "    print(f\"Deleeted {remaining} keys\")\n",
+    "\n",
+    "client.delete(\"user_watched_list\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From c17af34aa27fee38daa417695dbb165f473b9d61 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Mon, 14 Oct 2024 12:33:10 -0700
Subject: [PATCH 05/12] fully working collaborative filtering with bloomfilter
 notebook

---
 .../collaborative_filtering.ipynb             | 553 +++++++++++-------
 1 file changed, 348 insertions(+), 205 deletions(-)

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index 01e97b33..99044c02 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -89,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -176,16 +176,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x16cb1db50>"
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x108501ed0>"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -224,7 +224,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -232,7 +232,7 @@
      "output_type": "stream",
      "text": [
       "we have 671 users with feature vectors of size 100\n",
-      "we have 8398 movies with feature vectors of size 100\n"
+      "we have 8393 movies with feature vectors of size 100\n"
      ]
     }
    ],
@@ -258,14 +258,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "the predicted rating of user 347 on movie 5515 is 1.2662407571780765\n"
+      "the predicted rating of user 347 on movie 5515 is 0.965787539953316\n"
      ]
     }
    ],
@@ -281,15 +281,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "user: 347        item: 5515       r_ui = None   est = 1.27   {'was_impossible': False}\n",
-      "1.2662407571780765\n"
+      "user: 347        item: 5515       r_ui = None   est = 0.97   {'was_impossible': False}\n",
+      "0.965787539953316\n"
      ]
     }
    ],
@@ -314,7 +314,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -554,7 +554,7 @@
        "[5 rows x 23 columns]"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -566,7 +566,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -590,7 +590,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -631,7 +631,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -649,7 +649,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -677,15 +677,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "18:57:45 redisvl.index.index INFO   Index already exists, overwriting.\n",
-      "18:57:45 redisvl.index.index INFO   Index already exists, overwriting.\n"
+      "12:30:31 redisvl.index.index INFO   Index already exists, overwriting.\n",
+      "12:30:31 redisvl.index.index INFO   Index already exists, overwriting.\n"
      ]
     }
    ],
@@ -709,7 +709,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -718,17 +718,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "number of movies 8351\n",
-      "size of movie df 8351\n",
-      "unique movie ids 8347\n",
-      "unique movie titles 8104\n",
+      "number of movies 8348\n",
+      "size of movie df 8348\n",
+      "unique movie ids 8342\n",
+      "unique movie titles 8109\n",
       "unique movies rated 9065\n"
      ]
     },
@@ -795,7 +795,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[0.003070617914363312, -0.2183623175004815, -0...</td>\n",
+       "      <td>[0.16217072665688012, 0.245026260806211, -0.14...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -817,7 +817,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[0.013404150790652358, -0.1920666231028718, -0...</td>\n",
+       "      <td>[-0.0495065883180616, 0.017243236163025016, -0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -839,7 +839,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[0.17041991275371088, -0.14362645391937717, -0...</td>\n",
+       "      <td>[0.07067590986084793, 0.20963299716890343, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -861,7 +861,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[0.029246177676017816, -0.19591132539475606, -...</td>\n",
+       "      <td>[-0.023481240586441465, 0.1194581665494643, -0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -883,7 +883,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[-0.03755917677168938, -0.17405036529466641, 0...</td>\n",
+       "      <td>[0.07510781660794685, 0.19069717883675757, -0....</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -926,14 +926,14 @@
        "4  Father of the Bride Part II           5.7         173        5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [0.003070617914363312, -0.2183623175004815, -0...  \n",
-       "1   8844.0  [0.013404150790652358, -0.1920666231028718, -0...  \n",
-       "2  15602.0  [0.17041991275371088, -0.14362645391937717, -0...  \n",
-       "3  31357.0  [0.029246177676017816, -0.19591132539475606, -...  \n",
-       "4  11862.0  [-0.03755917677168938, -0.17405036529466641, 0...  "
+       "0    862.0  [0.16217072665688012, 0.245026260806211, -0.14...  \n",
+       "1   8844.0  [-0.0495065883180616, 0.017243236163025016, -0...  \n",
+       "2  15602.0  [0.07067590986084793, 0.20963299716890343, 0.2...  \n",
+       "3  31357.0  [-0.023481240586441465, 0.1194581665494643, -0...  \n",
+       "4  11862.0  [0.07510781660794685, 0.19069717883675757, -0....  "
       ]
      },
-     "execution_count": 22,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -970,33 +970,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'movie:9a77231d27154ea1a678907d8e2c31ee', 'vector_distance': '-3.15922021866', 'title': 'Forrest Gump', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:51a38fb8f13f4726a8019a8d66f2b05b', 'vector_distance': '-3.15213918686', 'title': 'Cool Hand Luke', 'genres': '[\"Crime\",\"Drama\"]'}\n",
-      "{'id': 'movie:2e487266815945b0b8b857a848292e3e', 'vector_distance': '-3.07703495026', 'title': 'The Shawshank Redemption', 'genres': '[\"Drama\",\"Crime\"]'}\n",
-      "{'id': 'movie:c048199f45d340e282e37ff1c54089ca', 'vector_distance': '-3.04389858246', 'title': 'Lock, Stock and Two Smoking Barrels', 'genres': '[\"Comedy\",\"Crime\"]'}\n",
-      "{'id': 'movie:0996165a33924a79beb757bcefc17ed3', 'vector_distance': '-3.03677082062', 'title': 'Return of the Jedi', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:00e1ad78f47745b6a0e41b101eb98ff0', 'vector_distance': '-3.01881790161', 'title': 'In the Line of Fire', 'genres': '[\"Action\",\"Drama\",\"Thriller\",\"Crime\",\"Mystery\"]'}\n",
-      "{'id': 'movie:8fd381ca8404447caf53b63a901c69eb', 'vector_distance': '-3.01792764664', 'title': \"Mr. Holland's Opus\", 'genres': '[\"Music\",\"Drama\",\"Family\"]'}\n",
-      "{'id': 'movie:139f8da6282541a3a5881c7c457dd5ea', 'vector_distance': '-3.00384759903', 'title': 'Lifeboat', 'genres': '[\"Drama\",\"War\"]'}\n",
-      "{'id': 'movie:600e4f66decf454587888263b3fd7c6c', 'vector_distance': '-3.00213766098', 'title': 'Fargo', 'genres': '[\"Crime\",\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:657859724d944786afa80a74a8ba7590', 'vector_distance': '-3.00026941299', 'title': 'Crumb', 'genres': '[\"Documentary\"]'}\n",
-      "{'id': 'movie:a8a6ce2be2e547d7be27cfcd3c4b412e', 'vector_distance': '-2.98649430275', 'title': 'Much Ado About Nothing', 'genres': '[\"Drama\",\"Comedy\",\"Romance\"]'}\n",
-      "{'id': 'movie:b77ab676009448b8b31f05cfad877d78', 'vector_distance': '-2.96709799767', 'title': 'Dead Man Walking', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:203d662a71db49b7afc86e101ed3b61c', 'vector_distance': '-2.96438765526', 'title': 'Star Wars', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:4355e3c0948943db8b9c1774e3c616a2', 'vector_distance': '-2.94901204109', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
-      "{'id': 'movie:81dc7920656a4d389767fd13c2cb24b3', 'vector_distance': '-2.92887830734', 'title': 'Sunset Boulevard', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:42d61bf4d1fc42a498fdb2985cd402dd', 'vector_distance': '-2.92803192139', 'title': 'Cinema Paradiso', 'genres': '[\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:0673b1bb33a144b78397fff8939ec758', 'vector_distance': '-2.92664003372', 'title': 'Eat Drink Man Woman', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:8bbac427a25643d48a0582a411f2ce71', 'vector_distance': '-2.92609977722', 'title': \"Ocean's Eleven\", 'genres': '[\"Thriller\",\"Crime\"]'}\n",
-      "{'id': 'movie:ead41675cf414eca86c3e995f398b09e', 'vector_distance': '-2.92416906357', 'title': 'Three Colors: Red', 'genres': '[\"Drama\",\"Mystery\",\"Romance\"]'}\n",
-      "{'id': 'movie:9843a33a6dd44f46ad475808cebb06fe', 'vector_distance': '-2.91586065292', 'title': 'Ponyo', 'genres': '[\"Animation\",\"Family\"]'}\n"
+      "{'id': 'movie:b5b8331ab3044a35bed03e7208dd7079', 'vector_distance': '-3.64372396469', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
+      "{'id': 'movie:a6e4d98e9bd34503aa0ece6924cdb4c9', 'vector_distance': '-3.59878540039', 'title': 'The Dark Knight', 'genres': '[\"Drama\",\"Action\",\"Crime\",\"Thriller\"]'}\n",
+      "{'id': 'movie:51e5e1fbd6d940d894122e98fc638e85', 'vector_distance': '-3.59825658798', 'title': '12 Angry Men', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:876f2cc13807471ba1dbaedbd92303c8', 'vector_distance': '-3.59230089188', 'title': 'Leon: The Professional', 'genres': '[\"Thriller\",\"Crime\",\"Drama\"]'}\n",
+      "{'id': 'movie:05a21bd00b8b4ed6938fc52cb532a601', 'vector_distance': '-3.54890108109', 'title': 'The Matrix', 'genres': '[\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:4644b98a4a21435981928b2d2871089d', 'vector_distance': '-3.4690322876', 'title': 'Band of Brothers', 'genres': '[\"Action\",\"Drama\",\"War\"]'}\n",
+      "{'id': 'movie:b096f81674a24f00a24cd9289d777913', 'vector_distance': '-3.46432924271', 'title': 'Memento', 'genres': '[\"Mystery\",\"Thriller\"]'}\n",
+      "{'id': 'movie:6861752bd166469aa89ab1ca69950ee8', 'vector_distance': '-3.4593539238', 'title': 'The Princess Bride', 'genres': '[\"Adventure\",\"Family\",\"Fantasy\",\"Comedy\",\"Romance\"]'}\n",
+      "{'id': 'movie:44911da20d8e4ad3a93a8452a3438feb', 'vector_distance': '-3.44543361664', 'title': 'American History X', 'genres': '[\"Drama\"]'}\n",
+      "{'id': 'movie:6f04f57431894700b9d23b60ebf8fac0', 'vector_distance': '-3.44274091721', 'title': 'Interstellar', 'genres': '[\"Adventure\",\"Drama\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:b2f480a4c97d404d8a5caa248b59a0e3', 'vector_distance': '-3.43494272232', 'title': 'Cinema Paradiso', 'genres': '[\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:367a0ad625564683b8818edc82b3878b', 'vector_distance': '-3.42747116089', 'title': 'The Prestige', 'genres': '[\"Drama\",\"Mystery\",\"Thriller\"]'}\n",
+      "{'id': 'movie:ec5ccd6e24fa470eae864f1ed3f7c566', 'vector_distance': '-3.42468452454', 'title': 'The African Queen', 'genres': '[\"Adventure\",\"War\",\"Romance\"]'}\n",
+      "{'id': 'movie:415b17e8f1a64ec0b7819068ae0ebc2d', 'vector_distance': '-3.4210562706', 'title': 'Happiness', 'genres': '[\"Comedy\",\"Drama\"]'}\n",
+      "{'id': 'movie:81802cdb06684b7fb3c26e754cb0bc50', 'vector_distance': '-3.41307687759', 'title': 'Forrest Gump', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:ce087387efd84691bab6a4228289ca47', 'vector_distance': '-3.40901231766', 'title': 'Thank You for Smoking', 'genres': '[\"Comedy\",\"Drama\"]'}\n",
+      "{'id': 'movie:846793c4d43e4f3a9c7536682e169789', 'vector_distance': '-3.38840723038', 'title': 'A Close Shave', 'genres': '[\"Family\",\"Animation\",\"Comedy\"]'}\n",
+      "{'id': 'movie:8e370a42277a43859a19cef8223549b2', 'vector_distance': '-3.38426446915', 'title': 'Up', 'genres': '[\"Animation\",\"Comedy\",\"Family\",\"Adventure\"]'}\n",
+      "{'id': 'movie:01c1de9dd23046f1ba630daf295b91a1', 'vector_distance': '-3.36946439743', 'title': 'Sin City', 'genres': '[\"Action\",\"Thriller\",\"Crime\"]'}\n",
+      "{'id': 'movie:a60d16b71e874eb99fad5461dc48b034', 'vector_distance': '-3.363966465', 'title': 'The Departed', 'genres': '[\"Drama\",\"Thriller\",\"Crime\"]'}\n"
      ]
     }
    ],
@@ -1032,7 +1032,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1071,7 +1071,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -1106,92 +1106,92 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>Pulp Fiction</td>\n",
-       "      <td>All About Eve</td>\n",
-       "      <td>The Grapes of Wrath</td>\n",
+       "      <td>The Professional</td>\n",
+       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
+       "      <td>The Professional</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>The Professional</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Das Boot</td>\n",
-       "      <td>Das Boot</td>\n",
-       "      <td>Das Boot</td>\n",
+       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
        "      <td>The Shawshank Redemption</td>\n",
-       "      <td>The Postman</td>\n",
-       "      <td>What's Eating Gilbert Grape</td>\n",
+       "      <td>Seven Samurai</td>\n",
+       "      <td>Pineapple Express</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Amadeus</td>\n",
-       "      <td>Amadeus</td>\n",
-       "      <td>Amadeus</td>\n",
-       "      <td>Gone Girl</td>\n",
-       "      <td>Bicycle Thieves</td>\n",
-       "      <td>A Clockwork Orange</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>Leon: The Professional</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>Pulp Fiction</td>\n",
+       "      <td>Akira</td>\n",
+       "      <td>James and the Giant Peach</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>Fargo</td>\n",
-       "      <td>Fargo</td>\n",
-       "      <td>Dr. Strangelove or: How I Learned to Stop Worr...</td>\n",
-       "      <td>Dawn of the Planet of the Apes</td>\n",
-       "      <td>My Neighbor Totoro</td>\n",
-       "      <td>Bananas</td>\n",
+       "      <td>Leon: The Professional</td>\n",
+       "      <td>The Godfather: Part II</td>\n",
+       "      <td>The Godfather: Part II</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>The Postman</td>\n",
+       "      <td>The Grapes of Wrath</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Dr. Strangelove or: How I Learned to Stop Worr...</td>\n",
-       "      <td>Shakespeare in Love</td>\n",
-       "      <td>Cinema Paradiso</td>\n",
-       "      <td>Fight Club</td>\n",
-       "      <td>The Wild Bunch</td>\n",
-       "      <td>Pineapple Express</td>\n",
+       "      <td>The Godfather: Part II</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
+       "      <td>Big Hero 6</td>\n",
+       "      <td>Cube</td>\n",
+       "      <td>Bananas</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>Cinema Paradiso</td>\n",
-       "      <td>The Last Emperor</td>\n",
-       "      <td>Take the Money and Run</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
+       "      <td>Fargo</td>\n",
+       "      <td>The African Queen</td>\n",
        "      <td>Blade Runner</td>\n",
-       "      <td>M</td>\n",
-       "      <td>James and the Giant Peach</td>\n",
+       "      <td>Castle in the Sky</td>\n",
+       "      <td>What's Eating Gilbert Grape</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>Take the Money and Run</td>\n",
-       "      <td>The Color Purple</td>\n",
-       "      <td>The Last Emperor</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>Rebel Without a Cause</td>\n",
+       "      <td>Let the Right One In</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>Cool Hand Luke</td>\n",
+       "      <td>Gone Girl</td>\n",
+       "      <td>M</td>\n",
        "      <td>The Apple Dumpling Gang</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>Shakespeare in Love</td>\n",
-       "      <td>Manhattan</td>\n",
-       "      <td>Raging Bull</td>\n",
-       "      <td>Big Hero 6</td>\n",
-       "      <td>Withnail &amp; I</td>\n",
-       "      <td>Adam's Apples</td>\n",
+       "      <td>The African Queen</td>\n",
+       "      <td>Eternal Sunshine of the Spotless Mind</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>Once Upon a Time in America</td>\n",
+       "      <td>Soldier of Orange</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>The Last Emperor</td>\n",
-       "      <td>Annie Hall</td>\n",
-       "      <td>The Color Purple</td>\n",
-       "      <td>Guardians of the Galaxy</td>\n",
-       "      <td>Meet John Doe</td>\n",
+       "      <td>Cool Hand Luke</td>\n",
+       "      <td>Inception</td>\n",
+       "      <td>12 Angry Men</td>\n",
+       "      <td>The Avengers</td>\n",
+       "      <td>All Quiet on the Western Front</td>\n",
        "      <td>Orange County</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>Raging Bull</td>\n",
-       "      <td>The Piano</td>\n",
-       "      <td>North by Northwest</td>\n",
-       "      <td>Captain America: Civil War</td>\n",
-       "      <td>Once Upon a Time in America</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>Boyz n the Hood</td>\n",
+       "      <td>The Treasure of the Sierra Madre</td>\n",
+       "      <td>Guardians of the Galaxy</td>\n",
+       "      <td>Aguirre: The Wrath of God</td>\n",
        "      <td>Herbie Goes Bananas</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1199,56 +1199,44 @@
        "</div>"
       ],
       "text/plain": [
-       "                                           top picks        block busters  \\\n",
-       "0                                       The Graduate         The Graduate   \n",
-       "1                                           Das Boot             Das Boot   \n",
-       "2                                            Amadeus              Amadeus   \n",
-       "3                                              Fargo                Fargo   \n",
-       "4  Dr. Strangelove or: How I Learned to Stop Worr...  Shakespeare in Love   \n",
-       "5                                    Cinema Paradiso     The Last Emperor   \n",
-       "6                             Take the Money and Run     The Color Purple   \n",
-       "7                                Shakespeare in Love            Manhattan   \n",
-       "8                                   The Last Emperor           Annie Hall   \n",
-       "9                                        Raging Bull            The Piano   \n",
-       "\n",
-       "                                            classics  \\\n",
-       "0                                       The Graduate   \n",
-       "1                                           Das Boot   \n",
-       "2                                            Amadeus   \n",
-       "3  Dr. Strangelove or: How I Learned to Stop Worr...   \n",
-       "4                                    Cinema Paradiso   \n",
-       "5                             Take the Money and Run   \n",
-       "6                                   The Last Emperor   \n",
-       "7                                        Raging Bull   \n",
-       "8                                   The Color Purple   \n",
-       "9                                 North by Northwest   \n",
+       "                         top picks                          block busters  \\\n",
+       "0                 The Professional        One Flew Over the Cuckoo's Nest   \n",
+       "1  One Flew Over the Cuckoo's Nest                          The Godfather   \n",
+       "2                    The Godfather                 Leon: The Professional   \n",
+       "3           Leon: The Professional                 The Godfather: Part II   \n",
+       "4           The Godfather: Part II                        The Dark Knight   \n",
+       "5               A Clockwork Orange                                  Fargo   \n",
+       "6             Let the Right One In                           The Graduate   \n",
+       "7                The African Queen  Eternal Sunshine of the Spotless Mind   \n",
+       "8                   Cool Hand Luke                              Inception   \n",
+       "9                  The Dark Knight                        Boyz n the Hood   \n",
        "\n",
-       "                   what's popular                   indie hits  \\\n",
-       "0                    Pulp Fiction                All About Eve   \n",
-       "1        The Shawshank Redemption                  The Postman   \n",
-       "2                       Gone Girl              Bicycle Thieves   \n",
-       "3  Dawn of the Planet of the Apes           My Neighbor Totoro   \n",
-       "4                      Fight Club               The Wild Bunch   \n",
-       "5                    Blade Runner                            M   \n",
-       "6                        Whiplash        Rebel Without a Cause   \n",
-       "7                      Big Hero 6                 Withnail & I   \n",
-       "8         Guardians of the Galaxy                Meet John Doe   \n",
-       "9      Captain America: Civil War  Once Upon a Time in America   \n",
+       "                           classics            what's popular  \\\n",
+       "0                  The Professional           The Dark Knight   \n",
+       "1   One Flew Over the Cuckoo's Nest  The Shawshank Redemption   \n",
+       "2                     The Godfather              Pulp Fiction   \n",
+       "3            The Godfather: Part II                Fight Club   \n",
+       "4                A Clockwork Orange                Big Hero 6   \n",
+       "5                 The African Queen              Blade Runner   \n",
+       "6                    Cool Hand Luke                 Gone Girl   \n",
+       "7                      The Graduate                  Whiplash   \n",
+       "8                      12 Angry Men              The Avengers   \n",
+       "9  The Treasure of the Sierra Madre   Guardians of the Galaxy   \n",
        "\n",
-       "                  fruity films  \n",
-       "0          The Grapes of Wrath  \n",
-       "1  What's Eating Gilbert Grape  \n",
-       "2           A Clockwork Orange  \n",
-       "3                      Bananas  \n",
-       "4            Pineapple Express  \n",
-       "5    James and the Giant Peach  \n",
-       "6      The Apple Dumpling Gang  \n",
-       "7                Adam's Apples  \n",
-       "8                Orange County  \n",
-       "9          Herbie Goes Bananas  "
+       "                       indie hits                 fruity films  \n",
+       "0                The Professional           A Clockwork Orange  \n",
+       "1                   Seven Samurai            Pineapple Express  \n",
+       "2                           Akira    James and the Giant Peach  \n",
+       "3                     The Postman          The Grapes of Wrath  \n",
+       "4                            Cube                      Bananas  \n",
+       "5               Castle in the Sky  What's Eating Gilbert Grape  \n",
+       "6                               M      The Apple Dumpling Gang  \n",
+       "7     Once Upon a Time in America            Soldier of Orange  \n",
+       "8  All Quiet on the Western Front                Orange County  \n",
+       "9       Aguirre: The Wrath of God          Herbie Goes Bananas  "
       ]
      },
-     "execution_count": 25,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1271,95 +1259,250 @@
    "metadata": {},
    "source": [
     "## Keeping Things Fresh\n",
-    "You've probably noticed that a few movies get repeated in these lists. That's not surprising as all our results are personalized and things like `popularity` and `user_rating` and `revenue` are likely highly correlated. And it's more that likely that at least some of the recommendations we're expecting to be highly rated by a given user is one they've already watched and rated highly.\n",
+    "You've probably noticed that a few movies get repeated in these lists. That's not surprising as all our results are personalized and things like `popularity` and `user_rating` and `revenue` are likely highly correlated. And it's more that likely that at least some of the recommendations we're expecting to be highly rated by a given user are ones they've already watched and rated highly.\n",
     "\n",
-    "Luckily Redis offers an easy anwer to keeping recommendations new and interesting, and that answer is Bloom Filters."
+    "Luckily Redis offers an easy answer to keeping recommendations new and interesting, and that answer is Bloom Filters."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
-     "ename": "AttributeError",
-     "evalue": "'bool' object has no attribute 'add'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[34], line 40\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[38;5;28mfilter\u001b[39m \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mbf()\u001b[38;5;241m.\u001b[39mcreate(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124muser_watched_list:\u001b[39m\u001b[38;5;132;01m{user_id}\u001b[39;00m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;241m0.01\u001b[39m, \u001b[38;5;241m1000\u001b[39m)\n\u001b[1;32m     39\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m movie_id \u001b[38;5;129;01min\u001b[39;00m watched_movies:\n\u001b[0;32m---> 40\u001b[0m     \u001b[38;5;28;43mfilter\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd\u001b[49m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00muser_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmovie_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     42\u001b[0m Top_picks_for_you \u001b[38;5;241m=\u001b[39m get_unique_recommendations(user_id\u001b[38;5;241m=\u001b[39muser_id)  \u001b[38;5;66;03m# general SVD results, no filter\u001b[39;00m\n\u001b[1;32m     43\u001b[0m block_buster_hits \u001b[38;5;241m=\u001b[39m get_unique_recommendations(user_id\u001b[38;5;241m=\u001b[39muser_id, filters\u001b[38;5;241m=\u001b[39mblock_buster_filter)\n",
-      "\u001b[0;31mAttributeError\u001b[0m: 'bool' object has no attribute 'add'"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BFCommands.exists() missing 1 required positional argument: 'item'\n"
      ]
     }
    ],
    "source": [
-    "\n",
-    "# create a bloom filter for a given user and add their watched list to it\n",
-    "def create_bloom_filter(user_id, watched_movies):\n",
-    "    if not client.bf().exists(f\"user_watched_list\"):\n",
-    "        filter = client.bf().create(f\"user_watched_list\", 0.01, 1000)\n",
-    "    for movie_id in watched_movies:\n",
-    "        client.bf().add(f\"user_watched_list\", f\"{user_id}:{movie_id}\")\n",
-    "    return filter\n",
-    "\n",
     "# rewrite the get_recommendations() function to use a bloom filter and apply it before we return results\n",
     "def get_unique_recommendations(user_id, filters=None, num_results=10):\n",
     "    user_vector = user_vectors[user_id].tolist()\n",
-    "    bloom_filter_name = f\"user:{user_id}:watched\"\n",
+    "    watched_movies = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()\n",
+    "\n",
+    "    client.bf().insert('user_watched_list', [f\"{user_id}:{movie_id}\" for movie_id in watched_movies])\n",
     "\n",
     "    query = RangeQuery(vector=user_vector,\n",
     "                       vector_field_name='movie_vector',\n",
     "                       num_results=num_results * 2,  # fetch more results to filter out watched movies\n",
     "                       filter_expression=filters,\n",
-    "                       #return_fields=['title', 'overview', 'genres', 'movie_id'])\n",
-    "                       return_fields=['title',  'movieId'])\n",
-    "\n",
+    "                       return_fields=['title', '$.movie_id', '$.movieId', 'imdb_id', 'imdbId','overview', 'genres'],\n",
+    "    ) # TODO figure out why i need to add '$.' to some fields, but not others\n",
     "    results = movie_index.query(query)\n",
     "\n",
     "    # filter out movies that the user has already watched\n",
     "    recommendations = []\n",
     "    for r in results:\n",
-    "        print(r)\n",
-    "        if not bloom_client.bfExists(bloom_filter_name, r['movieId']):\n",
+    "        if not client.bf().exists('user_watched_list', r['$.movieId']):\n",
     "            recommendations.append((r['title'], r['overview'], r['genres'], r['vector_distance']))\n",
     "        if len(recommendations) >= num_results:\n",
     "            break\n",
-    "\n",
     "    return recommendations\n",
     "\n",
     "# example usage\n",
-    "user_id = 42\n",
-    "watched_movies = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()\n",
+    "# create a bloom filter for this user\n",
+    "try:\n",
+    "    client.bf().exists(f\"user_watched_list\")\n",
+    "except Exception as e:\n",
+    "    print(e)\n",
+    "    pass\n",
     "\n",
-    "filter = client.bf().create('user_watched_list:{user_id}', 0.01, 1000)\n",
-    "for movie_id in watched_movies:\n",
-    "    filter.add(f'{user_id}:{movie_id}')\n",
+    "user_id = 42\n",
     "\n",
-    "Top_picks_for_you = get_unique_recommendations(user_id=user_id)  # general SVD results, no filter\n",
+    "top_picks_for_you = get_unique_recommendations(user_id=user_id)  # general SVD results, no filter\n",
     "block_buster_hits = get_unique_recommendations(user_id=user_id, filters=block_buster_filter)\n",
     "classics = get_unique_recommendations(user_id=user_id, filters=classics_filter)\n",
-    "Whats_popular = get_unique_recommendations(user_id=user_id, filters=popular_filter)\n",
+    "whats_popular = get_unique_recommendations(user_id=user_id, filters=popular_filter)\n",
     "indie_hits = get_unique_recommendations(user_id=user_id, filters=indie_filter)\n",
     "fruity_films = get_unique_recommendations(user_id=user_id, filters=fruity)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {
     "vscode": {
      "languageId": "ruby"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>top picks</th>\n",
+       "      <th>block busters</th>\n",
+       "      <th>classics</th>\n",
+       "      <th>what's popular</th>\n",
+       "      <th>indie hits</th>\n",
+       "      <th>fruity films</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>The Professional</td>\n",
+       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
+       "      <td>The Professional</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>The Professional</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
+       "      <td>The Shawshank Redemption</td>\n",
+       "      <td>Seven Samurai</td>\n",
+       "      <td>Pineapple Express</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>Leon: The Professional</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>Pulp Fiction</td>\n",
+       "      <td>Akira</td>\n",
+       "      <td>James and the Giant Peach</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Leon: The Professional</td>\n",
+       "      <td>The Godfather: Part II</td>\n",
+       "      <td>The Godfather: Part II</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>The Postman</td>\n",
+       "      <td>The Grapes of Wrath</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>The Godfather: Part II</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
+       "      <td>Big Hero 6</td>\n",
+       "      <td>Cube</td>\n",
+       "      <td>Bananas</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>A Clockwork Orange</td>\n",
+       "      <td>Fargo</td>\n",
+       "      <td>The African Queen</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>Castle in the Sky</td>\n",
+       "      <td>What's Eating Gilbert Grape</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Let the Right One In</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>Cool Hand Luke</td>\n",
+       "      <td>Gone Girl</td>\n",
+       "      <td>M</td>\n",
+       "      <td>The Apple Dumpling Gang</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>The African Queen</td>\n",
+       "      <td>Eternal Sunshine of the Spotless Mind</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>Once Upon a Time in America</td>\n",
+       "      <td>Soldier of Orange</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Cool Hand Luke</td>\n",
+       "      <td>Inception</td>\n",
+       "      <td>12 Angry Men</td>\n",
+       "      <td>The Avengers</td>\n",
+       "      <td>All Quiet on the Western Front</td>\n",
+       "      <td>Orange County</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>Boyz n the Hood</td>\n",
+       "      <td>The Treasure of the Sierra Madre</td>\n",
+       "      <td>Guardians of the Galaxy</td>\n",
+       "      <td>Aguirre: The Wrath of God</td>\n",
+       "      <td>Herbie Goes Bananas</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                         top picks                          block busters  \\\n",
+       "0                 The Professional        One Flew Over the Cuckoo's Nest   \n",
+       "1  One Flew Over the Cuckoo's Nest                          The Godfather   \n",
+       "2                    The Godfather                 Leon: The Professional   \n",
+       "3           Leon: The Professional                 The Godfather: Part II   \n",
+       "4           The Godfather: Part II                        The Dark Knight   \n",
+       "5               A Clockwork Orange                                  Fargo   \n",
+       "6             Let the Right One In                           The Graduate   \n",
+       "7                The African Queen  Eternal Sunshine of the Spotless Mind   \n",
+       "8                   Cool Hand Luke                              Inception   \n",
+       "9                  The Dark Knight                        Boyz n the Hood   \n",
+       "\n",
+       "                           classics            what's popular  \\\n",
+       "0                  The Professional           The Dark Knight   \n",
+       "1   One Flew Over the Cuckoo's Nest  The Shawshank Redemption   \n",
+       "2                     The Godfather              Pulp Fiction   \n",
+       "3            The Godfather: Part II                Fight Club   \n",
+       "4                A Clockwork Orange                Big Hero 6   \n",
+       "5                 The African Queen              Blade Runner   \n",
+       "6                    Cool Hand Luke                 Gone Girl   \n",
+       "7                      The Graduate                  Whiplash   \n",
+       "8                      12 Angry Men              The Avengers   \n",
+       "9  The Treasure of the Sierra Madre   Guardians of the Galaxy   \n",
+       "\n",
+       "                       indie hits                 fruity films  \n",
+       "0                The Professional           A Clockwork Orange  \n",
+       "1                   Seven Samurai            Pineapple Express  \n",
+       "2                           Akira    James and the Giant Peach  \n",
+       "3                     The Postman          The Grapes of Wrath  \n",
+       "4                            Cube                      Bananas  \n",
+       "5               Castle in the Sky  What's Eating Gilbert Grape  \n",
+       "6                               M      The Apple Dumpling Gang  \n",
+       "7     Once Upon a Time in America            Soldier of Orange  \n",
+       "8  All Quiet on the Western Front                Orange County  \n",
+       "9       Aguirre: The Wrath of God          Herbie Goes Bananas  "
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# put all these titles into a single pandas dataframe , where each column is one category\n",
     "all_recommendations = pd.DataFrame(columns=[\"top picks\", \"block busters\", \"classics\", \"what's popular\", \"indie hits\", \"fruity films\"])\n",
-    "all_recommendations[\"top picks\"] = [m[0] for m in Top_picks_for_you]\n",
+    "all_recommendations[\"top picks\"] = [m[0] for m in top_picks_for_you]\n",
     "all_recommendations[\"block busters\"] = [m[0] for m in block_buster_hits]\n",
     "all_recommendations[\"classics\"] = [m[0] for m in classics]\n",
-    "all_recommendations[\"what's popular\"] = [m[0] for m in Whats_popular]\n",
+    "all_recommendations[\"what's popular\"] = [m[0] for m in whats_popular]\n",
     "all_recommendations[\"indie hits\"] = [m[0] for m in indie_hits]\n",
     "all_recommendations[\"fruity films\"] = [m[0] for m in fruity_films]\n",
     "\n",
@@ -1376,14 +1519,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Deleted 4351 keys\n",
+      "Deleted 4348 keys\n",
       "Deleted 2000 keys\n",
       "Deleted 1000 keys\n",
       "Deleted 500 keys\n",
@@ -1396,7 +1539,7 @@
        "1"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }

From a36f4bbb185d25288e2051daf987c19c167a823c Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Mon, 14 Oct 2024 15:11:39 -0700
Subject: [PATCH 06/12] cleans up schema and notebook cells

---
 .../collaborative_filtering.ipynb             | 714 ++++++++++--------
 .../collaborative_filtering_schema.yaml       |   4 +-
 .../recommendation-systems/user_schema.yaml   |  20 -
 3 files changed, 398 insertions(+), 340 deletions(-)
 delete mode 100644 python-recipes/recommendation-systems/user_schema.yaml

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index 99044c02..afa93fe4 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -93,9 +93,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ratings_file = 'ratings_small.csv'\n",
-    "\n",
-    "ratings_df = fetch_dataframe(ratings_file)\n",
+    "ratings_df = fetch_dataframe('ratings_small.csv') # for a larger example use 'ratings.csv' instead\n",
     "\n",
     "# only keep the columns we need: userId, movieId, rating\n",
     "ratings_df = ratings_df[['userId', 'movieId', 'rating']]\n",
@@ -164,9 +162,9 @@
     "\n",
     "| movie_1_feature_1 | movie_2_feature_1 | movie_3_feature_1 | ... | movie_M_feature_1 |\n",
     "| --- | --- | --- | --- | --- |\n",
-    "| movie_1_feature_2 | movie_2_feature_2 | movie_3_feature_2 | ... | movie_M_feature_1 |\n",
-    "| movie_1_feature_3 | movie_2_feature_3 | movie_3_feature_3 | ... | movie_M_feature_1 |\n",
-    "| movie_1_feature_4 | movie_2_feature_4 | movie_3_feature_4 | ... | movie_M_feature_1 |\n",
+    "| movie_1_feature_2 | movie_2_feature_2 | movie_3_feature_2 | ... | movie_M_feature_2 |\n",
+    "| movie_1_feature_3 | movie_2_feature_3 | movie_3_feature_3 | ... | movie_M_feature_3 |\n",
+    "| movie_1_feature_4 | movie_2_feature_4 | movie_3_feature_4 | ... | movie_M_feature_4 |\n",
     "|  ...  | . | . | ... | . |\n",
     "| movie_1_feature_k | movie_2_feature_k | movie_3_feature_k | ... | movie_M_feature_k |\n",
     "\n",
@@ -182,7 +180,7 @@
     {
      "data": {
       "text/plain": [
-       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x108501ed0>"
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1067e7d10>"
       ]
      },
      "execution_count": 5,
@@ -195,7 +193,7 @@
     "train_set, test_set = train_test_split(ratings_data, test_size=0.2)\n",
     "\n",
     "# use SVD (Singular Value Decomposition) for collaborative filtering\n",
-    "svd = SVD(n_factors=100, biased=False)  # We'll set biased to False so that predictions are of the form \"rating_prediction = user_vector dot item_vector\"\n",
+    "svd = SVD(n_factors=100, biased=False)  # we'll set biased to False so that predictions are of the form \"rating_prediction = user_vector dot item_vector\"\n",
     "\n",
     "# train the algorithm on the train_set\n",
     "svd.fit(train_set)"
@@ -232,7 +230,7 @@
      "output_type": "stream",
      "text": [
       "we have 671 users with feature vectors of size 100\n",
-      "we have 8393 movies with feature vectors of size 100\n"
+      "we have 8376 movies with feature vectors of size 100\n"
      ]
     }
    ],
@@ -265,7 +263,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "the predicted rating of user 347 on movie 5515 is 0.965787539953316\n"
+      "the predicted rating of user 347 on movie 5515 is 1.2554222750662518\n"
      ]
     }
    ],
@@ -279,29 +277,6 @@
     "print(f'the predicted rating of user {347} on movie {5515} is {predicted_rating}')"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "user: 347        item: 5515       r_ui = None   est = 0.97   {'was_impossible': False}\n",
-      "0.965787539953316\n"
-     ]
-    }
-   ],
-   "source": [
-    "# sanity check my math matches Surprise package math\n",
-    "print(svd.predict(347, 5515))\n",
-    "\n",
-    "inner_uid = train_set.to_inner_uid(347)\n",
-    "inner_iid = train_set.to_inner_iid(5515)\n",
-    "print(np.dot(user_vectors[inner_uid], movie_vectors[inner_iid])) # surprise casts userId and movieId to inner ids"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -635,8 +610,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "links_df = fetch_dataframe('links_small.csv') # for a larger example use 'links.csv' instead\n",
     "\n",
-    "links_df = fetch_dataframe('links_small.csv')\n",
     "movies_df = movies_df.merge(links_df, left_on='imdb_id', right_on='imdbId', how='inner')"
    ]
   },
@@ -651,7 +626,213 @@
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>budget</th>\n",
+       "      <th>genres</th>\n",
+       "      <th>id</th>\n",
+       "      <th>imdb_id</th>\n",
+       "      <th>original_language</th>\n",
+       "      <th>overview</th>\n",
+       "      <th>popularity</th>\n",
+       "      <th>release_date</th>\n",
+       "      <th>revenue</th>\n",
+       "      <th>runtime</th>\n",
+       "      <th>status</th>\n",
+       "      <th>tagline</th>\n",
+       "      <th>title</th>\n",
+       "      <th>vote_average</th>\n",
+       "      <th>vote_count</th>\n",
+       "      <th>movieId</th>\n",
+       "      <th>imdbId</th>\n",
+       "      <th>tmdbId</th>\n",
+       "      <th>movie_vector</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>30000000</td>\n",
+       "      <td>[Animation, Comedy, Family]</td>\n",
+       "      <td>862</td>\n",
+       "      <td>114709</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
+       "      <td>21.946943</td>\n",
+       "      <td>815040000.0</td>\n",
+       "      <td>373554033</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td></td>\n",
+       "      <td>Toy Story</td>\n",
+       "      <td>7.7</td>\n",
+       "      <td>5415</td>\n",
+       "      <td>1</td>\n",
+       "      <td>114709</td>\n",
+       "      <td>862.0</td>\n",
+       "      <td>[-0.09139158006123944, 0.3113782797006747, -0....</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>65000000</td>\n",
+       "      <td>[Adventure, Fantasy, Family]</td>\n",
+       "      <td>8844</td>\n",
+       "      <td>113497</td>\n",
+       "      <td>en</td>\n",
+       "      <td>When siblings Judy and Peter discover an encha...</td>\n",
+       "      <td>17.015539</td>\n",
+       "      <td>819014400.0</td>\n",
+       "      <td>262797249</td>\n",
+       "      <td>104.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Roll the dice and unleash the excitement!</td>\n",
+       "      <td>Jumanji</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>2413</td>\n",
+       "      <td>2</td>\n",
+       "      <td>113497</td>\n",
+       "      <td>8844.0</td>\n",
+       "      <td>[-0.5145776514053282, 0.18805717045856102, 0.0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>[Romance, Comedy]</td>\n",
+       "      <td>15602</td>\n",
+       "      <td>113228</td>\n",
+       "      <td>en</td>\n",
+       "      <td>A family wedding reignites the ancient feud be...</td>\n",
+       "      <td>11.712900</td>\n",
+       "      <td>819619200.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>101.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Still Yelling. Still Fighting. Still Ready for...</td>\n",
+       "      <td>Grumpier Old Men</td>\n",
+       "      <td>6.5</td>\n",
+       "      <td>92</td>\n",
+       "      <td>3</td>\n",
+       "      <td>113228</td>\n",
+       "      <td>15602.0</td>\n",
+       "      <td>[-0.09342489820078766, 0.1563727417086737, -0....</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16000000</td>\n",
+       "      <td>[Comedy, Drama, Romance]</td>\n",
+       "      <td>31357</td>\n",
+       "      <td>114885</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
+       "      <td>3.859495</td>\n",
+       "      <td>819619200.0</td>\n",
+       "      <td>81452156</td>\n",
+       "      <td>127.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Friends are the people who let you be yourself...</td>\n",
+       "      <td>Waiting to Exhale</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>34</td>\n",
+       "      <td>4</td>\n",
+       "      <td>114885</td>\n",
+       "      <td>31357.0</td>\n",
+       "      <td>[-0.033617228695296826, 0.20003386580703916, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>[Comedy]</td>\n",
+       "      <td>11862</td>\n",
+       "      <td>113041</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Just when George Banks has recovered from his ...</td>\n",
+       "      <td>8.387519</td>\n",
+       "      <td>792403200.0</td>\n",
+       "      <td>76578911</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Just When His World Is Back To Normal... He's ...</td>\n",
+       "      <td>Father of the Bride Part II</td>\n",
+       "      <td>5.7</td>\n",
+       "      <td>173</td>\n",
+       "      <td>5</td>\n",
+       "      <td>113041</td>\n",
+       "      <td>11862.0</td>\n",
+       "      <td>[0.03270775039139693, 0.16435040013526048, 0.0...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     budget                        genres     id  imdb_id original_language  \\\n",
+       "0  30000000   [Animation, Comedy, Family]    862   114709                en   \n",
+       "1  65000000  [Adventure, Fantasy, Family]   8844   113497                en   \n",
+       "2         0             [Romance, Comedy]  15602   113228                en   \n",
+       "3  16000000      [Comedy, Drama, Romance]  31357   114885                en   \n",
+       "4         0                      [Comedy]  11862   113041                en   \n",
+       "\n",
+       "                                            overview  popularity  \\\n",
+       "0  Led by Woody, Andy's toys live happily in his ...   21.946943   \n",
+       "1  When siblings Judy and Peter discover an encha...   17.015539   \n",
+       "2  A family wedding reignites the ancient feud be...   11.712900   \n",
+       "3  Cheated on, mistreated and stepped on, the wom...    3.859495   \n",
+       "4  Just when George Banks has recovered from his ...    8.387519   \n",
+       "\n",
+       "   release_date    revenue  runtime    status  \\\n",
+       "0   815040000.0  373554033     81.0  Released   \n",
+       "1   819014400.0  262797249    104.0  Released   \n",
+       "2   819619200.0          0    101.0  Released   \n",
+       "3   819619200.0   81452156    127.0  Released   \n",
+       "4   792403200.0   76578911    106.0  Released   \n",
+       "\n",
+       "                                             tagline  \\\n",
+       "0                                                      \n",
+       "1          Roll the dice and unleash the excitement!   \n",
+       "2  Still Yelling. Still Fighting. Still Ready for...   \n",
+       "3  Friends are the people who let you be yourself...   \n",
+       "4  Just When His World Is Back To Normal... He's ...   \n",
+       "\n",
+       "                         title  vote_average  vote_count movieId  imdbId  \\\n",
+       "0                    Toy Story           7.7        5415       1  114709   \n",
+       "1                      Jumanji           6.9        2413       2  113497   \n",
+       "2             Grumpier Old Men           6.5          92       3  113228   \n",
+       "3            Waiting to Exhale           6.1          34       4  114885   \n",
+       "4  Father of the Bride Part II           5.7         173       5  113041   \n",
+       "\n",
+       "    tmdbId                                       movie_vector  \n",
+       "0    862.0  [-0.09139158006123944, 0.3113782797006747, -0....  \n",
+       "1   8844.0  [-0.5145776514053282, 0.18805717045856102, 0.0...  \n",
+       "2  15602.0  [-0.09342489820078766, 0.1563727417086737, -0....  \n",
+       "3  31357.0  [-0.033617228695296826, 0.20003386580703916, 0...  \n",
+       "4  11862.0  [0.03270775039139693, 0.16435040013526048, 0.0...  "
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# build a dataframe out of the user vectors and their userIds\n",
     "user_vectors_and_ids = {train_set.to_raw_uid(inner_id): user_vectors[inner_id].tolist() for inner_id in train_set.all_users()}\n",
@@ -662,7 +843,9 @@
     "movie_vector_df = pd.Series(movie_vectors_and_ids).to_frame('movie_vector')\n",
     "\n",
     "# merge the movie vector series with the movies dataframe using the movieId and id fields\n",
-    "movies_df = movies_df.merge(movie_vector_df, left_on='movieId', right_index=True, how='inner')\n"
+    "movies_df = movies_df.merge(movie_vector_df, left_on='movieId', right_index=True, how='inner')\n",
+    "movies_df['movieId'] = movies_df['movieId'].apply(lambda x: str(x)) # need to cast to a string as this is a tag field in our search schema\n",
+    "movies_df.head()"
    ]
   },
   {
@@ -684,8 +867,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "12:30:31 redisvl.index.index INFO   Index already exists, overwriting.\n",
-      "12:30:31 redisvl.index.index INFO   Index already exists, overwriting.\n"
+      "15:08:18 redisvl.index.index INFO   Index already exists, overwriting.\n"
      ]
     }
    ],
@@ -699,12 +881,7 @@
     "movie_schema = IndexSchema.from_yaml(\"collaborative_filtering_schema.yaml\")\n",
     "\n",
     "movie_index = SearchIndex(movie_schema, redis_client=client)\n",
-    "movie_index.create(overwrite=True, drop=True)\n",
-    "\n",
-    "user_schema = IndexSchema.from_yaml(\"user_schema.yaml\")\n",
-    "\n",
-    "user_index = SearchIndex(user_schema, redis_client=client)\n",
-    "user_index.create(overwrite=True, drop=True)"
+    "movie_index.create(overwrite=True, drop=True)"
    ]
   },
   {
@@ -725,10 +902,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "number of movies 8348\n",
-      "size of movie df 8348\n",
-      "unique movie ids 8342\n",
-      "unique movie titles 8109\n",
+      "number of movies 8334\n",
+      "size of movie df 8334\n",
+      "unique movie ids 8327\n",
+      "unique movie titles 8082\n",
       "unique movies rated 9065\n"
      ]
     },
@@ -795,7 +972,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[0.16217072665688012, 0.245026260806211, -0.14...</td>\n",
+       "      <td>[-0.09139158006123944, 0.3113782797006747, -0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -817,7 +994,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[-0.0495065883180616, 0.017243236163025016, -0...</td>\n",
+       "      <td>[-0.5145776514053282, 0.18805717045856102, 0.0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -839,7 +1016,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[0.07067590986084793, 0.20963299716890343, 0.2...</td>\n",
+       "      <td>[-0.09342489820078766, 0.1563727417086737, -0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -861,7 +1038,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[-0.023481240586441465, 0.1194581665494643, -0...</td>\n",
+       "      <td>[-0.033617228695296826, 0.20003386580703916, 0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -883,7 +1060,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[0.07510781660794685, 0.19069717883675757, -0....</td>\n",
+       "      <td>[0.03270775039139693, 0.16435040013526048, 0.0...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -918,19 +1095,19 @@
        "3  Friends are the people who let you be yourself...   \n",
        "4  Just When His World Is Back To Normal... He's ...   \n",
        "\n",
-       "                         title  vote_average  vote_count  movieId  imdbId  \\\n",
-       "0                    Toy Story           7.7        5415        1  114709   \n",
-       "1                      Jumanji           6.9        2413        2  113497   \n",
-       "2             Grumpier Old Men           6.5          92        3  113228   \n",
-       "3            Waiting to Exhale           6.1          34        4  114885   \n",
-       "4  Father of the Bride Part II           5.7         173        5  113041   \n",
+       "                         title  vote_average  vote_count movieId  imdbId  \\\n",
+       "0                    Toy Story           7.7        5415       1  114709   \n",
+       "1                      Jumanji           6.9        2413       2  113497   \n",
+       "2             Grumpier Old Men           6.5          92       3  113228   \n",
+       "3            Waiting to Exhale           6.1          34       4  114885   \n",
+       "4  Father of the Bride Part II           5.7         173       5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [0.16217072665688012, 0.245026260806211, -0.14...  \n",
-       "1   8844.0  [-0.0495065883180616, 0.017243236163025016, -0...  \n",
-       "2  15602.0  [0.07067590986084793, 0.20963299716890343, 0.2...  \n",
-       "3  31357.0  [-0.023481240586441465, 0.1194581665494643, -0...  \n",
-       "4  11862.0  [0.07510781660794685, 0.19069717883675757, -0....  "
+       "0    862.0  [-0.09139158006123944, 0.3113782797006747, -0....  \n",
+       "1   8844.0  [-0.5145776514053282, 0.18805717045856102, 0.0...  \n",
+       "2  15602.0  [-0.09342489820078766, 0.1563727417086737, -0....  \n",
+       "3  31357.0  [-0.033617228695296826, 0.20003386580703916, 0...  \n",
+       "4  11862.0  [0.03270775039139693, 0.16435040013526048, 0.0...  "
       ]
      },
      "execution_count": 15,
@@ -939,7 +1116,7 @@
     }
    ],
    "source": [
-    "# sanity check we merged all my dataframes properly and have the right sizes of moives, users, vectors, ids, etc.\n",
+    "# sanity check we merged all dataframes properly and have the right sizes of movies, users, vectors, ids, etc.\n",
     "number_of_movies = len(movies_df.to_dict(orient='records'))\n",
     "size_of_movie_df = movies_df.shape[0]\n",
     "\n",
@@ -977,26 +1154,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'movie:b5b8331ab3044a35bed03e7208dd7079', 'vector_distance': '-3.64372396469', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
-      "{'id': 'movie:a6e4d98e9bd34503aa0ece6924cdb4c9', 'vector_distance': '-3.59878540039', 'title': 'The Dark Knight', 'genres': '[\"Drama\",\"Action\",\"Crime\",\"Thriller\"]'}\n",
-      "{'id': 'movie:51e5e1fbd6d940d894122e98fc638e85', 'vector_distance': '-3.59825658798', 'title': '12 Angry Men', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:876f2cc13807471ba1dbaedbd92303c8', 'vector_distance': '-3.59230089188', 'title': 'Leon: The Professional', 'genres': '[\"Thriller\",\"Crime\",\"Drama\"]'}\n",
-      "{'id': 'movie:05a21bd00b8b4ed6938fc52cb532a601', 'vector_distance': '-3.54890108109', 'title': 'The Matrix', 'genres': '[\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:4644b98a4a21435981928b2d2871089d', 'vector_distance': '-3.4690322876', 'title': 'Band of Brothers', 'genres': '[\"Action\",\"Drama\",\"War\"]'}\n",
-      "{'id': 'movie:b096f81674a24f00a24cd9289d777913', 'vector_distance': '-3.46432924271', 'title': 'Memento', 'genres': '[\"Mystery\",\"Thriller\"]'}\n",
-      "{'id': 'movie:6861752bd166469aa89ab1ca69950ee8', 'vector_distance': '-3.4593539238', 'title': 'The Princess Bride', 'genres': '[\"Adventure\",\"Family\",\"Fantasy\",\"Comedy\",\"Romance\"]'}\n",
-      "{'id': 'movie:44911da20d8e4ad3a93a8452a3438feb', 'vector_distance': '-3.44543361664', 'title': 'American History X', 'genres': '[\"Drama\"]'}\n",
-      "{'id': 'movie:6f04f57431894700b9d23b60ebf8fac0', 'vector_distance': '-3.44274091721', 'title': 'Interstellar', 'genres': '[\"Adventure\",\"Drama\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:b2f480a4c97d404d8a5caa248b59a0e3', 'vector_distance': '-3.43494272232', 'title': 'Cinema Paradiso', 'genres': '[\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:367a0ad625564683b8818edc82b3878b', 'vector_distance': '-3.42747116089', 'title': 'The Prestige', 'genres': '[\"Drama\",\"Mystery\",\"Thriller\"]'}\n",
-      "{'id': 'movie:ec5ccd6e24fa470eae864f1ed3f7c566', 'vector_distance': '-3.42468452454', 'title': 'The African Queen', 'genres': '[\"Adventure\",\"War\",\"Romance\"]'}\n",
-      "{'id': 'movie:415b17e8f1a64ec0b7819068ae0ebc2d', 'vector_distance': '-3.4210562706', 'title': 'Happiness', 'genres': '[\"Comedy\",\"Drama\"]'}\n",
-      "{'id': 'movie:81802cdb06684b7fb3c26e754cb0bc50', 'vector_distance': '-3.41307687759', 'title': 'Forrest Gump', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:ce087387efd84691bab6a4228289ca47', 'vector_distance': '-3.40901231766', 'title': 'Thank You for Smoking', 'genres': '[\"Comedy\",\"Drama\"]'}\n",
-      "{'id': 'movie:846793c4d43e4f3a9c7536682e169789', 'vector_distance': '-3.38840723038', 'title': 'A Close Shave', 'genres': '[\"Family\",\"Animation\",\"Comedy\"]'}\n",
-      "{'id': 'movie:8e370a42277a43859a19cef8223549b2', 'vector_distance': '-3.38426446915', 'title': 'Up', 'genres': '[\"Animation\",\"Comedy\",\"Family\",\"Adventure\"]'}\n",
-      "{'id': 'movie:01c1de9dd23046f1ba630daf295b91a1', 'vector_distance': '-3.36946439743', 'title': 'Sin City', 'genres': '[\"Action\",\"Thriller\",\"Crime\"]'}\n",
-      "{'id': 'movie:a60d16b71e874eb99fad5461dc48b034', 'vector_distance': '-3.363966465', 'title': 'The Departed', 'genres': '[\"Drama\",\"Thriller\",\"Crime\"]'}\n"
+      "{'id': 'movie:879bc7ef3bac4639a76b8d39eb22ae25', 'vector_distance': '-3.82712745667', 'title': 'Star Wars', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:db33b0943cf942a5b410e71669c2f47f', 'vector_distance': '-3.75959968567', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
+      "{'id': 'movie:994ee37edbfe42efaf023ba750e43b08', 'vector_distance': '-3.74698734283', 'title': 'The Lord of the Rings: The Two Towers', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
+      "{'id': 'movie:1e0c3f95110e48d4bacefcb38817448f', 'vector_distance': '-3.736120224', 'title': 'The Empire Strikes Back', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:43b966cec4454bc4a59949f037ab5d80', 'vector_distance': '-3.55685997009', 'title': 'The Shawshank Redemption', 'genres': '[\"Drama\",\"Crime\"]'}\n",
+      "{'id': 'movie:1a42cf127f7c422e9542934c560dccf3', 'vector_distance': '-3.4610490799', 'title': 'The Lord of the Rings: The Return of the King', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
+      "{'id': 'movie:720bb022230c4755ac5e039043ea63a5', 'vector_distance': '-3.3770198822', 'title': 'Return of the Jedi', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:0300dfccc16d48e69ff6b2a8f89f3307', 'vector_distance': '-3.32906913757', 'title': 'Shrek', 'genres': '[\"Adventure\",\"Animation\",\"Comedy\",\"Family\",\"Fantasy\"]'}\n",
+      "{'id': 'movie:1658113a6de2434ab52cdd0050ac87d9', 'vector_distance': '-3.3269367218', 'title': 'The Sixth Sense', 'genres': '[\"Mystery\",\"Thriller\",\"Drama\"]'}\n",
+      "{'id': 'movie:eb1d3a76e17447558ba10b331acb3f93', 'vector_distance': '-3.3075504303', 'title': 'Raiders of the Lost Ark', 'genres': '[\"Adventure\",\"Action\"]'}\n",
+      "{'id': 'movie:fca0a88760ba4dcbb852d13d1c862aa8', 'vector_distance': '-3.29939317703', 'title': 'The Silence of the Lambs', 'genres': '[\"Crime\",\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:a930a67907ef449eaa5dda12af79f5af', 'vector_distance': '-3.25780773163', 'title': 'Lock, Stock and Two Smoking Barrels', 'genres': '[\"Comedy\",\"Crime\"]'}\n"
      ]
     }
    ],
@@ -1009,7 +1178,7 @@
     "# this is what we want. The predicted rating on a scale of 0 to 5 is then -(score - 1) == -score + 1\n",
     "query = RangeQuery(vector=user_vector,\n",
     "                    vector_field_name='movie_vector',\n",
-    "                    num_results=20,\n",
+    "                    num_results=12,\n",
     "                    return_score=True,\n",
     "                    return_fields=['title', 'genres']\n",
     "                    )\n",
@@ -1106,134 +1275,134 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>The Professional</td>\n",
-       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
-       "      <td>The Professional</td>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>The Professional</td>\n",
-       "      <td>A Clockwork Orange</td>\n",
+       "      <td>The Fugitive</td>\n",
+       "      <td>The Fugitive</td>\n",
+       "      <td>The Philadelphia Story</td>\n",
+       "      <td>The Shawshank Redemption</td>\n",
+       "      <td>Shine</td>\n",
+       "      <td>What's Eating Gilbert Grape</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
-       "      <td>The Shawshank Redemption</td>\n",
-       "      <td>Seven Samurai</td>\n",
-       "      <td>Pineapple Express</td>\n",
+       "      <td>Terminator 2: Judgment Day</td>\n",
+       "      <td>Terminator 2: Judgment Day</td>\n",
+       "      <td>Butch Cassidy and the Sundance Kid</td>\n",
+       "      <td>Pulp Fiction</td>\n",
+       "      <td>La Haine</td>\n",
+       "      <td>The Grapes of Wrath</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>Leon: The Professional</td>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>Pulp Fiction</td>\n",
-       "      <td>Akira</td>\n",
-       "      <td>James and the Giant Peach</td>\n",
+       "      <td>Clear and Present Danger</td>\n",
+       "      <td>Clear and Present Danger</td>\n",
+       "      <td>Star Wars</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>Castle in the Sky</td>\n",
+       "      <td>Pineapple Express</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>Leon: The Professional</td>\n",
-       "      <td>The Godfather: Part II</td>\n",
-       "      <td>The Godfather: Part II</td>\n",
-       "      <td>Fight Club</td>\n",
-       "      <td>The Postman</td>\n",
-       "      <td>The Grapes of Wrath</td>\n",
+       "      <td>The Silence of the Lambs</td>\n",
+       "      <td>The Silence of the Lambs</td>\n",
+       "      <td>The Bridge on the River Kwai</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>All About Eve</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>The Godfather: Part II</td>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>A Clockwork Orange</td>\n",
-       "      <td>Big Hero 6</td>\n",
-       "      <td>Cube</td>\n",
-       "      <td>Bananas</td>\n",
+       "      <td>Schindler's List</td>\n",
+       "      <td>Schindler's List</td>\n",
+       "      <td>The Treasure of the Sierra Madre</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>M</td>\n",
+       "      <td>James and the Giant Peach</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>A Clockwork Orange</td>\n",
-       "      <td>Fargo</td>\n",
-       "      <td>The African Queen</td>\n",
-       "      <td>Blade Runner</td>\n",
-       "      <td>Castle in the Sky</td>\n",
-       "      <td>What's Eating Gilbert Grape</td>\n",
+       "      <td>Se7en</td>\n",
+       "      <td>Se7en</td>\n",
+       "      <td>A Christmas Story</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>The Contender</td>\n",
+       "      <td>Bananas</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>Let the Right One In</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>Cool Hand Luke</td>\n",
-       "      <td>Gone Girl</td>\n",
-       "      <td>M</td>\n",
+       "      <td>The Philadelphia Story</td>\n",
+       "      <td>Speed</td>\n",
+       "      <td>The Little Mermaid</td>\n",
+       "      <td>Big Hero 6</td>\n",
+       "      <td>The Postman</td>\n",
        "      <td>The Apple Dumpling Gang</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>The African Queen</td>\n",
-       "      <td>Eternal Sunshine of the Spotless Mind</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>Once Upon a Time in America</td>\n",
-       "      <td>Soldier of Orange</td>\n",
+       "      <td>A Close Shave</td>\n",
+       "      <td>Fargo</td>\n",
+       "      <td>Roger &amp; Me</td>\n",
+       "      <td>The Avengers</td>\n",
+       "      <td>Maverick</td>\n",
+       "      <td>Herbie Goes Bananas</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>Cool Hand Luke</td>\n",
-       "      <td>Inception</td>\n",
-       "      <td>12 Angry Men</td>\n",
-       "      <td>The Avengers</td>\n",
-       "      <td>All Quiet on the Western Front</td>\n",
+       "      <td>The Usual Suspects</td>\n",
+       "      <td>Amélie</td>\n",
+       "      <td>Dead Poets Society</td>\n",
+       "      <td>Gone Girl</td>\n",
+       "      <td>The Meaning of Life</td>\n",
        "      <td>Orange County</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>Boyz n the Hood</td>\n",
-       "      <td>The Treasure of the Sierra Madre</td>\n",
+       "      <td>Speed</td>\n",
+       "      <td>Jurassic Park</td>\n",
+       "      <td>Stand by Me</td>\n",
        "      <td>Guardians of the Galaxy</td>\n",
-       "      <td>Aguirre: The Wrath of God</td>\n",
-       "      <td>Herbie Goes Bananas</td>\n",
+       "      <td>Frost/Nixon</td>\n",
+       "      <td>The Apple Dumpling Gang Rides Again</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                         top picks                          block busters  \\\n",
-       "0                 The Professional        One Flew Over the Cuckoo's Nest   \n",
-       "1  One Flew Over the Cuckoo's Nest                          The Godfather   \n",
-       "2                    The Godfather                 Leon: The Professional   \n",
-       "3           Leon: The Professional                 The Godfather: Part II   \n",
-       "4           The Godfather: Part II                        The Dark Knight   \n",
-       "5               A Clockwork Orange                                  Fargo   \n",
-       "6             Let the Right One In                           The Graduate   \n",
-       "7                The African Queen  Eternal Sunshine of the Spotless Mind   \n",
-       "8                   Cool Hand Luke                              Inception   \n",
-       "9                  The Dark Knight                        Boyz n the Hood   \n",
+       "                    top picks               block busters  \\\n",
+       "0                The Fugitive                The Fugitive   \n",
+       "1  Terminator 2: Judgment Day  Terminator 2: Judgment Day   \n",
+       "2    Clear and Present Danger    Clear and Present Danger   \n",
+       "3    The Silence of the Lambs    The Silence of the Lambs   \n",
+       "4            Schindler's List            Schindler's List   \n",
+       "5                       Se7en                       Se7en   \n",
+       "6      The Philadelphia Story                       Speed   \n",
+       "7               A Close Shave                       Fargo   \n",
+       "8          The Usual Suspects                      Amélie   \n",
+       "9                       Speed               Jurassic Park   \n",
        "\n",
-       "                           classics            what's popular  \\\n",
-       "0                  The Professional           The Dark Knight   \n",
-       "1   One Flew Over the Cuckoo's Nest  The Shawshank Redemption   \n",
-       "2                     The Godfather              Pulp Fiction   \n",
-       "3            The Godfather: Part II                Fight Club   \n",
-       "4                A Clockwork Orange                Big Hero 6   \n",
-       "5                 The African Queen              Blade Runner   \n",
-       "6                    Cool Hand Luke                 Gone Girl   \n",
-       "7                      The Graduate                  Whiplash   \n",
-       "8                      12 Angry Men              The Avengers   \n",
-       "9  The Treasure of the Sierra Madre   Guardians of the Galaxy   \n",
+       "                             classics            what's popular  \\\n",
+       "0              The Philadelphia Story  The Shawshank Redemption   \n",
+       "1  Butch Cassidy and the Sundance Kid              Pulp Fiction   \n",
+       "2                           Star Wars              Blade Runner   \n",
+       "3        The Bridge on the River Kwai           The Dark Knight   \n",
+       "4    The Treasure of the Sierra Madre                  Whiplash   \n",
+       "5                   A Christmas Story                Fight Club   \n",
+       "6                  The Little Mermaid                Big Hero 6   \n",
+       "7                          Roger & Me              The Avengers   \n",
+       "8                  Dead Poets Society                 Gone Girl   \n",
+       "9                         Stand by Me   Guardians of the Galaxy   \n",
        "\n",
-       "                       indie hits                 fruity films  \n",
-       "0                The Professional           A Clockwork Orange  \n",
-       "1                   Seven Samurai            Pineapple Express  \n",
-       "2                           Akira    James and the Giant Peach  \n",
-       "3                     The Postman          The Grapes of Wrath  \n",
-       "4                            Cube                      Bananas  \n",
-       "5               Castle in the Sky  What's Eating Gilbert Grape  \n",
-       "6                               M      The Apple Dumpling Gang  \n",
-       "7     Once Upon a Time in America            Soldier of Orange  \n",
-       "8  All Quiet on the Western Front                Orange County  \n",
-       "9       Aguirre: The Wrath of God          Herbie Goes Bananas  "
+       "            indie hits                         fruity films  \n",
+       "0                Shine          What's Eating Gilbert Grape  \n",
+       "1             La Haine                  The Grapes of Wrath  \n",
+       "2    Castle in the Sky                    Pineapple Express  \n",
+       "3        All About Eve                   A Clockwork Orange  \n",
+       "4                    M            James and the Giant Peach  \n",
+       "5        The Contender                              Bananas  \n",
+       "6          The Postman              The Apple Dumpling Gang  \n",
+       "7             Maverick                  Herbie Goes Bananas  \n",
+       "8  The Meaning of Life                        Orange County  \n",
+       "9          Frost/Nixon  The Apple Dumpling Gang Rides Again  "
       ]
      },
      "execution_count": 18,
@@ -1242,7 +1411,7 @@
     }
    ],
    "source": [
-    "# put all these titles into a single pandas dataframe , where each column is one category\n",
+    "# put all these titles into a single pandas dataframe, where each column is one category\n",
     "all_recommendations = pd.DataFrame(columns=[\"top picks\", \"block busters\", \"classics\", \"what's popular\", \"indie hits\", \"fruity films\"])\n",
     "all_recommendations[\"top picks\"] = [m[0] for m in Top_picks_for_you]\n",
     "all_recommendations[\"block busters\"] = [m[0] for m in block_buster_hits]\n",
@@ -1268,15 +1437,7 @@
    "cell_type": "code",
    "execution_count": 19,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "BFCommands.exists() missing 1 required positional argument: 'item'\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# rewrite the get_recommendations() function to use a bloom filter and apply it before we return results\n",
     "def get_unique_recommendations(user_id, filters=None, num_results=10):\n",
@@ -1287,37 +1448,38 @@
     "\n",
     "    query = RangeQuery(vector=user_vector,\n",
     "                       vector_field_name='movie_vector',\n",
-    "                       num_results=num_results * 2,  # fetch more results to filter out watched movies\n",
+    "                       num_results=num_results * 5,  # fetch more results to filter out watched movies\n",
     "                       filter_expression=filters,\n",
-    "                       return_fields=['title', '$.movie_id', '$.movieId', 'imdb_id', 'imdbId','overview', 'genres'],\n",
+    "                       return_fields=['title', 'overview', 'genres', 'movieId'],\n",
     "    ) # TODO figure out why i need to add '$.' to some fields, but not others\n",
     "    results = movie_index.query(query)\n",
     "\n",
     "    # filter out movies that the user has already watched\n",
     "    recommendations = []\n",
     "    for r in results:\n",
-    "        if not client.bf().exists('user_watched_list', r['$.movieId']):\n",
+    "        if not client.bf().exists('user_watched_list', f\"{user_id}:{r['movieId']}\"):\n",
     "            recommendations.append((r['title'], r['overview'], r['genres'], r['vector_distance']))\n",
     "        if len(recommendations) >= num_results:\n",
     "            break\n",
+    "\n",
+    "    # add these films to the bloom filter\n",
+    "    client.bf().insert('user_watched_list', [f\"{user_id}:{r['movieId']}\" for r  in results])\n",
     "    return recommendations\n",
     "\n",
     "# example usage\n",
     "# create a bloom filter for this user\n",
     "try:\n",
-    "    client.bf().exists(f\"user_watched_list\")\n",
+    "    client.bf().create(f\"user_watched_list\", 0.01, 10000)\n",
     "except Exception as e:\n",
     "    print(e)\n",
-    "    pass\n",
     "\n",
     "user_id = 42\n",
     "\n",
-    "top_picks_for_you = get_unique_recommendations(user_id=user_id)  # general SVD results, no filter\n",
-    "block_buster_hits = get_unique_recommendations(user_id=user_id, filters=block_buster_filter)\n",
-    "classics = get_unique_recommendations(user_id=user_id, filters=classics_filter)\n",
-    "whats_popular = get_unique_recommendations(user_id=user_id, filters=popular_filter)\n",
-    "indie_hits = get_unique_recommendations(user_id=user_id, filters=indie_filter)\n",
-    "fruity_films = get_unique_recommendations(user_id=user_id, filters=fruity)"
+    "top_picks_for_you = get_unique_recommendations(user_id=user_id, num_results=5)  # general SVD results, no filter\n",
+    "block_buster_hits = get_unique_recommendations(user_id=user_id, filters=block_buster_filter, num_results=5)\n",
+    "classics = get_unique_recommendations(user_id=user_id, filters=classics_filter, num_results=5)\n",
+    "whats_popular = get_unique_recommendations(user_id=user_id, filters=popular_filter, num_results=5)\n",
+    "indie_hits = get_unique_recommendations(user_id=user_id, filters=indie_filter, num_results=5)"
    ]
   },
   {
@@ -1355,140 +1517,67 @@
        "      <th>classics</th>\n",
        "      <th>what's popular</th>\n",
        "      <th>indie hits</th>\n",
-       "      <th>fruity films</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>The Professional</td>\n",
-       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
-       "      <td>The Professional</td>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>The Professional</td>\n",
-       "      <td>A Clockwork Orange</td>\n",
+       "      <td>The Silence of the Lambs</td>\n",
+       "      <td>Star Trek: Generations</td>\n",
+       "      <td>The Bridge on the River Kwai</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>Shine</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>One Flew Over the Cuckoo's Nest</td>\n",
-       "      <td>The Shawshank Redemption</td>\n",
-       "      <td>Seven Samurai</td>\n",
-       "      <td>Pineapple Express</td>\n",
+       "      <td>Se7en</td>\n",
+       "      <td>Jumanji</td>\n",
+       "      <td>The Treasure of the Sierra Madre</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>La Haine</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>Leon: The Professional</td>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>Pulp Fiction</td>\n",
-       "      <td>Akira</td>\n",
-       "      <td>James and the Giant Peach</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Leon: The Professional</td>\n",
-       "      <td>The Godfather: Part II</td>\n",
-       "      <td>The Godfather: Part II</td>\n",
+       "      <td>The Philadelphia Story</td>\n",
+       "      <td>Outbreak</td>\n",
+       "      <td>A Christmas Story</td>\n",
        "      <td>Fight Club</td>\n",
-       "      <td>The Postman</td>\n",
-       "      <td>The Grapes of Wrath</td>\n",
+       "      <td>Castle in the Sky</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>The Godfather: Part II</td>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>A Clockwork Orange</td>\n",
+       "      <th>3</th>\n",
+       "      <td>A Close Shave</td>\n",
+       "      <td>The Lion King</td>\n",
+       "      <td>The Little Mermaid</td>\n",
        "      <td>Big Hero 6</td>\n",
-       "      <td>Cube</td>\n",
-       "      <td>Bananas</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>A Clockwork Orange</td>\n",
-       "      <td>Fargo</td>\n",
-       "      <td>The African Queen</td>\n",
-       "      <td>Blade Runner</td>\n",
-       "      <td>Castle in the Sky</td>\n",
-       "      <td>What's Eating Gilbert Grape</td>\n",
+       "      <td>All About Eve</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>Let the Right One In</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>Cool Hand Luke</td>\n",
+       "      <th>4</th>\n",
+       "      <td>The Usual Suspects</td>\n",
+       "      <td>Men in Black</td>\n",
+       "      <td>Roger &amp; Me</td>\n",
        "      <td>Gone Girl</td>\n",
        "      <td>M</td>\n",
-       "      <td>The Apple Dumpling Gang</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>The African Queen</td>\n",
-       "      <td>Eternal Sunshine of the Spotless Mind</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>Once Upon a Time in America</td>\n",
-       "      <td>Soldier of Orange</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>Cool Hand Luke</td>\n",
-       "      <td>Inception</td>\n",
-       "      <td>12 Angry Men</td>\n",
-       "      <td>The Avengers</td>\n",
-       "      <td>All Quiet on the Western Front</td>\n",
-       "      <td>Orange County</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>Boyz n the Hood</td>\n",
-       "      <td>The Treasure of the Sierra Madre</td>\n",
-       "      <td>Guardians of the Galaxy</td>\n",
-       "      <td>Aguirre: The Wrath of God</td>\n",
-       "      <td>Herbie Goes Bananas</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                         top picks                          block busters  \\\n",
-       "0                 The Professional        One Flew Over the Cuckoo's Nest   \n",
-       "1  One Flew Over the Cuckoo's Nest                          The Godfather   \n",
-       "2                    The Godfather                 Leon: The Professional   \n",
-       "3           Leon: The Professional                 The Godfather: Part II   \n",
-       "4           The Godfather: Part II                        The Dark Knight   \n",
-       "5               A Clockwork Orange                                  Fargo   \n",
-       "6             Let the Right One In                           The Graduate   \n",
-       "7                The African Queen  Eternal Sunshine of the Spotless Mind   \n",
-       "8                   Cool Hand Luke                              Inception   \n",
-       "9                  The Dark Knight                        Boyz n the Hood   \n",
-       "\n",
-       "                           classics            what's popular  \\\n",
-       "0                  The Professional           The Dark Knight   \n",
-       "1   One Flew Over the Cuckoo's Nest  The Shawshank Redemption   \n",
-       "2                     The Godfather              Pulp Fiction   \n",
-       "3            The Godfather: Part II                Fight Club   \n",
-       "4                A Clockwork Orange                Big Hero 6   \n",
-       "5                 The African Queen              Blade Runner   \n",
-       "6                    Cool Hand Luke                 Gone Girl   \n",
-       "7                      The Graduate                  Whiplash   \n",
-       "8                      12 Angry Men              The Avengers   \n",
-       "9  The Treasure of the Sierra Madre   Guardians of the Galaxy   \n",
+       "                  top picks           block busters  \\\n",
+       "0  The Silence of the Lambs  Star Trek: Generations   \n",
+       "1                     Se7en                 Jumanji   \n",
+       "2    The Philadelphia Story                Outbreak   \n",
+       "3             A Close Shave           The Lion King   \n",
+       "4        The Usual Suspects            Men in Black   \n",
        "\n",
-       "                       indie hits                 fruity films  \n",
-       "0                The Professional           A Clockwork Orange  \n",
-       "1                   Seven Samurai            Pineapple Express  \n",
-       "2                           Akira    James and the Giant Peach  \n",
-       "3                     The Postman          The Grapes of Wrath  \n",
-       "4                            Cube                      Bananas  \n",
-       "5               Castle in the Sky  What's Eating Gilbert Grape  \n",
-       "6                               M      The Apple Dumpling Gang  \n",
-       "7     Once Upon a Time in America            Soldier of Orange  \n",
-       "8  All Quiet on the Western Front                Orange County  \n",
-       "9       Aguirre: The Wrath of God          Herbie Goes Bananas  "
+       "                           classics what's popular         indie hits  \n",
+       "0      The Bridge on the River Kwai   Blade Runner              Shine  \n",
+       "1  The Treasure of the Sierra Madre       Whiplash           La Haine  \n",
+       "2                 A Christmas Story     Fight Club  Castle in the Sky  \n",
+       "3                The Little Mermaid     Big Hero 6      All About Eve  \n",
+       "4                        Roger & Me      Gone Girl                  M  "
       ]
      },
      "execution_count": 20,
@@ -1498,13 +1587,12 @@
    ],
    "source": [
     "# put all these titles into a single pandas dataframe , where each column is one category\n",
-    "all_recommendations = pd.DataFrame(columns=[\"top picks\", \"block busters\", \"classics\", \"what's popular\", \"indie hits\", \"fruity films\"])\n",
+    "all_recommendations = pd.DataFrame(columns=[\"top picks\", \"block busters\", \"classics\", \"what's popular\", \"indie hits\"])\n",
     "all_recommendations[\"top picks\"] = [m[0] for m in top_picks_for_you]\n",
     "all_recommendations[\"block busters\"] = [m[0] for m in block_buster_hits]\n",
     "all_recommendations[\"classics\"] = [m[0] for m in classics]\n",
     "all_recommendations[\"what's popular\"] = [m[0] for m in whats_popular]\n",
     "all_recommendations[\"indie hits\"] = [m[0] for m in indie_hits]\n",
-    "all_recommendations[\"fruity films\"] = [m[0] for m in fruity_films]\n",
     "\n",
     "all_recommendations.head(10)"
    ]
@@ -1526,7 +1614,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Deleted 4348 keys\n",
+      "Deleted 4334 keys\n",
       "Deleted 2000 keys\n",
       "Deleted 1000 keys\n",
       "Deleted 500 keys\n",
@@ -1549,18 +1637,8 @@
     "while remaining := movie_index.clear():\n",
     "    print(f\"Deleted {remaining} keys\")\n",
     "\n",
-    "while remaining := user_index.clear():\n",
-    "    print(f\"Deleeted {remaining} keys\")\n",
-    "\n",
     "client.delete(\"user_watched_list\")"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml b/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
index f10d686b..0a6f61b4 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
+++ b/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
@@ -4,9 +4,9 @@ index:
     storage_type: json
 
 fields:
-    - name: genres
+    - name: movieId
       type: tag
-    - name: movie_id
+    - name: genres
       type: tag
     - name: original_language
       type: tag
diff --git a/python-recipes/recommendation-systems/user_schema.yaml b/python-recipes/recommendation-systems/user_schema.yaml
deleted file mode 100644
index 6d5c9ebd..00000000
--- a/python-recipes/recommendation-systems/user_schema.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-index:
-    name: users
-    prefix: user
-    storage_type: json
-
-fields:
-    - name: user_id
-      type: tag
-    - name: ratings
-      type: numeric
-      name: watched_list
-      type: text
-
-    - name: user_vector
-      type: vector
-      attrs:
-          dims: 100
-          distance_metric: ip
-          algorithm: flat
-          dtype: float32
\ No newline at end of file

From fcaffb45885f667b49e38357f90d1c621ebf5cfd Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Mon, 14 Oct 2024 15:34:40 -0700
Subject: [PATCH 07/12] replaces for loop bloom filter check with mexists()

---
 .../collaborative_filtering.ipynb             | 396 +++++++++---------
 1 file changed, 202 insertions(+), 194 deletions(-)

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index afa93fe4..09344699 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -180,7 +180,7 @@
     {
      "data": {
       "text/plain": [
-       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1067e7d10>"
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1052209d0>"
       ]
      },
      "execution_count": 5,
@@ -230,7 +230,7 @@
      "output_type": "stream",
      "text": [
       "we have 671 users with feature vectors of size 100\n",
-      "we have 8376 movies with feature vectors of size 100\n"
+      "we have 8377 movies with feature vectors of size 100\n"
      ]
     }
    ],
@@ -263,7 +263,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "the predicted rating of user 347 on movie 5515 is 1.2554222750662518\n"
+      "the predicted rating of user 347 on movie 5515 is 1.3640325071309123\n"
      ]
     }
    ],
@@ -289,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -529,7 +529,7 @@
        "[5 rows x 23 columns]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -541,7 +541,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -565,7 +565,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -606,7 +606,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -624,7 +624,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -690,7 +690,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[-0.09139158006123944, 0.3113782797006747, -0....</td>\n",
+       "      <td>[0.03713469204683083, 0.10796564373254629, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -712,7 +712,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[-0.5145776514053282, 0.18805717045856102, 0.0...</td>\n",
+       "      <td>[-0.010117012753361906, -0.03687474969254127, ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -734,7 +734,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[-0.09342489820078766, 0.1563727417086737, -0....</td>\n",
+       "      <td>[0.13139654322372601, 0.14560140137289648, 0.1...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -756,7 +756,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[-0.033617228695296826, 0.20003386580703916, 0...</td>\n",
+       "      <td>[0.1564855291020289, -0.01096475924961168, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -778,7 +778,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[0.03270775039139693, 0.16435040013526048, 0.0...</td>\n",
+       "      <td>[0.07205704581865023, 0.25224445082871455, 0.0...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -821,14 +821,14 @@
        "4  Father of the Bride Part II           5.7         173       5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [-0.09139158006123944, 0.3113782797006747, -0....  \n",
-       "1   8844.0  [-0.5145776514053282, 0.18805717045856102, 0.0...  \n",
-       "2  15602.0  [-0.09342489820078766, 0.1563727417086737, -0....  \n",
-       "3  31357.0  [-0.033617228695296826, 0.20003386580703916, 0...  \n",
-       "4  11862.0  [0.03270775039139693, 0.16435040013526048, 0.0...  "
+       "0    862.0  [0.03713469204683083, 0.10796564373254629, 0.2...  \n",
+       "1   8844.0  [-0.010117012753361906, -0.03687474969254127, ...  \n",
+       "2  15602.0  [0.13139654322372601, 0.14560140137289648, 0.1...  \n",
+       "3  31357.0  [0.1564855291020289, -0.01096475924961168, 0.2...  \n",
+       "4  11862.0  [0.07205704581865023, 0.25224445082871455, 0.0...  "
       ]
      },
-     "execution_count": 12,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -860,14 +860,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "15:08:18 redisvl.index.index INFO   Index already exists, overwriting.\n"
+      "15:33:21 redisvl.index.index INFO   Index already exists, overwriting.\n"
      ]
     }
    ],
@@ -886,7 +886,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -895,17 +895,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "number of movies 8334\n",
-      "size of movie df 8334\n",
-      "unique movie ids 8327\n",
-      "unique movie titles 8082\n",
+      "number of movies 8337\n",
+      "size of movie df 8337\n",
+      "unique movie ids 8331\n",
+      "unique movie titles 8100\n",
       "unique movies rated 9065\n"
      ]
     },
@@ -972,7 +972,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[-0.09139158006123944, 0.3113782797006747, -0....</td>\n",
+       "      <td>[0.03713469204683083, 0.10796564373254629, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -994,7 +994,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[-0.5145776514053282, 0.18805717045856102, 0.0...</td>\n",
+       "      <td>[-0.010117012753361906, -0.03687474969254127, ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1016,7 +1016,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[-0.09342489820078766, 0.1563727417086737, -0....</td>\n",
+       "      <td>[0.13139654322372601, 0.14560140137289648, 0.1...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1038,7 +1038,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[-0.033617228695296826, 0.20003386580703916, 0...</td>\n",
+       "      <td>[0.1564855291020289, -0.01096475924961168, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1060,7 +1060,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[0.03270775039139693, 0.16435040013526048, 0.0...</td>\n",
+       "      <td>[0.07205704581865023, 0.25224445082871455, 0.0...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1103,14 +1103,14 @@
        "4  Father of the Bride Part II           5.7         173       5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [-0.09139158006123944, 0.3113782797006747, -0....  \n",
-       "1   8844.0  [-0.5145776514053282, 0.18805717045856102, 0.0...  \n",
-       "2  15602.0  [-0.09342489820078766, 0.1563727417086737, -0....  \n",
-       "3  31357.0  [-0.033617228695296826, 0.20003386580703916, 0...  \n",
-       "4  11862.0  [0.03270775039139693, 0.16435040013526048, 0.0...  "
+       "0    862.0  [0.03713469204683083, 0.10796564373254629, 0.2...  \n",
+       "1   8844.0  [-0.010117012753361906, -0.03687474969254127, ...  \n",
+       "2  15602.0  [0.13139654322372601, 0.14560140137289648, 0.1...  \n",
+       "3  31357.0  [0.1564855291020289, -0.01096475924961168, 0.2...  \n",
+       "4  11862.0  [0.07205704581865023, 0.25224445082871455, 0.0...  "
       ]
      },
-     "execution_count": 15,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1147,25 +1147,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'movie:879bc7ef3bac4639a76b8d39eb22ae25', 'vector_distance': '-3.82712745667', 'title': 'Star Wars', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:db33b0943cf942a5b410e71669c2f47f', 'vector_distance': '-3.75959968567', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
-      "{'id': 'movie:994ee37edbfe42efaf023ba750e43b08', 'vector_distance': '-3.74698734283', 'title': 'The Lord of the Rings: The Two Towers', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
-      "{'id': 'movie:1e0c3f95110e48d4bacefcb38817448f', 'vector_distance': '-3.736120224', 'title': 'The Empire Strikes Back', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:43b966cec4454bc4a59949f037ab5d80', 'vector_distance': '-3.55685997009', 'title': 'The Shawshank Redemption', 'genres': '[\"Drama\",\"Crime\"]'}\n",
-      "{'id': 'movie:1a42cf127f7c422e9542934c560dccf3', 'vector_distance': '-3.4610490799', 'title': 'The Lord of the Rings: The Return of the King', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
-      "{'id': 'movie:720bb022230c4755ac5e039043ea63a5', 'vector_distance': '-3.3770198822', 'title': 'Return of the Jedi', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:0300dfccc16d48e69ff6b2a8f89f3307', 'vector_distance': '-3.32906913757', 'title': 'Shrek', 'genres': '[\"Adventure\",\"Animation\",\"Comedy\",\"Family\",\"Fantasy\"]'}\n",
-      "{'id': 'movie:1658113a6de2434ab52cdd0050ac87d9', 'vector_distance': '-3.3269367218', 'title': 'The Sixth Sense', 'genres': '[\"Mystery\",\"Thriller\",\"Drama\"]'}\n",
-      "{'id': 'movie:eb1d3a76e17447558ba10b331acb3f93', 'vector_distance': '-3.3075504303', 'title': 'Raiders of the Lost Ark', 'genres': '[\"Adventure\",\"Action\"]'}\n",
-      "{'id': 'movie:fca0a88760ba4dcbb852d13d1c862aa8', 'vector_distance': '-3.29939317703', 'title': 'The Silence of the Lambs', 'genres': '[\"Crime\",\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:a930a67907ef449eaa5dda12af79f5af', 'vector_distance': '-3.25780773163', 'title': 'Lock, Stock and Two Smoking Barrels', 'genres': '[\"Comedy\",\"Crime\"]'}\n"
+      "{'id': 'movie:123a01ce087f4d09a833970c182f0eb2', 'vector_distance': '-2.13837456703', 'title': 'A Close Shave', 'genres': '[\"Family\",\"Animation\",\"Comedy\"]'}\n",
+      "{'id': 'movie:f6fb0a03ca0c41a4b1d63249ede39d2f', 'vector_distance': '-2.11249995232', 'title': \"Schindler's List\", 'genres': '[\"Drama\",\"History\",\"War\"]'}\n",
+      "{'id': 'movie:4d302b9754534983bf70b2304d04633e', 'vector_distance': '-2.09581518173', 'title': 'The African Queen', 'genres': '[\"Adventure\",\"War\",\"Romance\"]'}\n",
+      "{'id': 'movie:3eb10be0511641e48c41bb2de628bf6f', 'vector_distance': '-2.08978199959', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
+      "{'id': 'movie:6206dd42b51048edb819adc5fbe07ba7', 'vector_distance': '-2.07609891891', 'title': 'Forrest Gump', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:905af238977f40f793080388d0aa1380', 'vector_distance': '-2.05023360252', 'title': 'The Wrong Trousers', 'genres': '[\"Animation\",\"Comedy\",\"Family\"]'}\n",
+      "{'id': 'movie:ad7f5971e4b64a44a318e3e48105a114', 'vector_distance': '-2.03544998169', 'title': 'Star Wars', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:ac19e02ffd4c4833abdd3ecc4702abe9', 'vector_distance': '-1.98030018806', 'title': 'Monty Python and the Holy Grail', 'genres': '[\"Adventure\",\"Comedy\",\"Fantasy\"]'}\n",
+      "{'id': 'movie:7d6b88e1d652486f96756fc2b5a7f087', 'vector_distance': '-1.98028421402', 'title': 'Mad Max 2: The Road Warrior', 'genres': '[\"Adventure\",\"Action\",\"Thriller\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:595fd594f6c8406e91d7baa6bc63efdb', 'vector_distance': '-1.96802783012', 'title': 'Fargo', 'genres': '[\"Crime\",\"Drama\",\"Thriller\"]'}\n",
+      "{'id': 'movie:04e884d71097481c8c07d8babb723f02', 'vector_distance': '-1.93948292732', 'title': 'Roger & Me', 'genres': '[\"Documentary\",\"History\"]'}\n",
+      "{'id': 'movie:015dcb2c5d30445787c5392ac551abbb', 'vector_distance': '-1.92847204208', 'title': 'The Imitation Game', 'genres': '[\"History\",\"Drama\",\"Thriller\",\"War\"]'}\n"
      ]
     }
    ],
@@ -1201,7 +1201,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1240,7 +1240,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -1275,137 +1275,137 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>The Fugitive</td>\n",
-       "      <td>The Fugitive</td>\n",
-       "      <td>The Philadelphia Story</td>\n",
        "      <td>The Shawshank Redemption</td>\n",
-       "      <td>Shine</td>\n",
-       "      <td>What's Eating Gilbert Grape</td>\n",
+       "      <td>Good Will Hunting</td>\n",
+       "      <td>Yojimbo</td>\n",
+       "      <td>The Shawshank Redemption</td>\n",
+       "      <td>Yojimbo</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Terminator 2: Judgment Day</td>\n",
-       "      <td>Terminator 2: Judgment Day</td>\n",
-       "      <td>Butch Cassidy and the Sundance Kid</td>\n",
-       "      <td>Pulp Fiction</td>\n",
-       "      <td>La Haine</td>\n",
-       "      <td>The Grapes of Wrath</td>\n",
+       "      <td>Yojimbo</td>\n",
+       "      <td>Annie Hall</td>\n",
+       "      <td>Monty Python and the Holy Grail</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>My Neighbor Totoro</td>\n",
+       "      <td>Pineapple Express</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Clear and Present Danger</td>\n",
-       "      <td>Clear and Present Danger</td>\n",
-       "      <td>Star Wars</td>\n",
-       "      <td>Blade Runner</td>\n",
-       "      <td>Castle in the Sky</td>\n",
-       "      <td>Pineapple Express</td>\n",
+       "      <td>Monty Python and the Holy Grail</td>\n",
+       "      <td>Indiana Jones and the Last Crusade</td>\n",
+       "      <td>Raising Arizona</td>\n",
+       "      <td>Pulp Fiction</td>\n",
+       "      <td>The Meaning of Life</td>\n",
+       "      <td>What's Eating Gilbert Grape</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>The Silence of the Lambs</td>\n",
-       "      <td>The Silence of the Lambs</td>\n",
-       "      <td>The Bridge on the River Kwai</td>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>All About Eve</td>\n",
-       "      <td>A Clockwork Orange</td>\n",
+       "      <td>Big Night</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>To Kill a Mockingbird</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>Rebel Without a Cause</td>\n",
+       "      <td>James and the Giant Peach</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Schindler's List</td>\n",
-       "      <td>Schindler's List</td>\n",
-       "      <td>The Treasure of the Sierra Madre</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>M</td>\n",
-       "      <td>James and the Giant Peach</td>\n",
+       "      <td>Raising Arizona</td>\n",
+       "      <td>Rear Window</td>\n",
+       "      <td>Annie Hall</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>The Professional</td>\n",
+       "      <td>The Grapes of Wrath</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>Se7en</td>\n",
-       "      <td>Se7en</td>\n",
-       "      <td>A Christmas Story</td>\n",
-       "      <td>Fight Club</td>\n",
-       "      <td>The Contender</td>\n",
+       "      <td>Ed Wood</td>\n",
+       "      <td>Star Trek</td>\n",
+       "      <td>Indiana Jones and the Last Crusade</td>\n",
+       "      <td>The Avengers</td>\n",
+       "      <td>Sanjuro</td>\n",
        "      <td>Bananas</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>The Philadelphia Story</td>\n",
-       "      <td>Speed</td>\n",
-       "      <td>The Little Mermaid</td>\n",
-       "      <td>Big Hero 6</td>\n",
-       "      <td>The Postman</td>\n",
+       "      <td>Good Will Hunting</td>\n",
+       "      <td>American Beauty</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>All About Eve</td>\n",
        "      <td>The Apple Dumpling Gang</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>A Close Shave</td>\n",
-       "      <td>Fargo</td>\n",
-       "      <td>Roger &amp; Me</td>\n",
-       "      <td>The Avengers</td>\n",
-       "      <td>Maverick</td>\n",
-       "      <td>Herbie Goes Bananas</td>\n",
+       "      <td>To Kill a Mockingbird</td>\n",
+       "      <td>Schindler's List</td>\n",
+       "      <td>Rear Window</td>\n",
+       "      <td>Big Hero 6</td>\n",
+       "      <td>All Quiet on the Western Front</td>\n",
+       "      <td>Orange County</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>The Usual Suspects</td>\n",
-       "      <td>Amélie</td>\n",
-       "      <td>Dead Poets Society</td>\n",
+       "      <td>Annie Hall</td>\n",
+       "      <td>Twelve Monkeys</td>\n",
+       "      <td>The Bridge on the River Kwai</td>\n",
        "      <td>Gone Girl</td>\n",
-       "      <td>The Meaning of Life</td>\n",
-       "      <td>Orange County</td>\n",
+       "      <td>Cowboy Bebop: The Movie</td>\n",
+       "      <td>Herbie Goes Bananas</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>Speed</td>\n",
-       "      <td>Jurassic Park</td>\n",
-       "      <td>Stand by Me</td>\n",
+       "      <td>Indiana Jones and the Last Crusade</td>\n",
+       "      <td>The Princess Bride</td>\n",
+       "      <td>Roger &amp; Me</td>\n",
        "      <td>Guardians of the Galaxy</td>\n",
-       "      <td>Frost/Nixon</td>\n",
-       "      <td>The Apple Dumpling Gang Rides Again</td>\n",
+       "      <td>City Lights</td>\n",
+       "      <td>Adam's Apples</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                    top picks               block busters  \\\n",
-       "0                The Fugitive                The Fugitive   \n",
-       "1  Terminator 2: Judgment Day  Terminator 2: Judgment Day   \n",
-       "2    Clear and Present Danger    Clear and Present Danger   \n",
-       "3    The Silence of the Lambs    The Silence of the Lambs   \n",
-       "4            Schindler's List            Schindler's List   \n",
-       "5                       Se7en                       Se7en   \n",
-       "6      The Philadelphia Story                       Speed   \n",
-       "7               A Close Shave                       Fargo   \n",
-       "8          The Usual Suspects                      Amélie   \n",
-       "9                       Speed               Jurassic Park   \n",
+       "                            top picks                       block busters  \\\n",
+       "0            The Shawshank Redemption                   Good Will Hunting   \n",
+       "1                             Yojimbo                          Annie Hall   \n",
+       "2     Monty Python and the Holy Grail  Indiana Jones and the Last Crusade   \n",
+       "3                           Big Night                        The Graduate   \n",
+       "4                     Raising Arizona                         Rear Window   \n",
+       "5                             Ed Wood                           Star Trek   \n",
+       "6                   Good Will Hunting                     American Beauty   \n",
+       "7               To Kill a Mockingbird                    Schindler's List   \n",
+       "8                          Annie Hall                      Twelve Monkeys   \n",
+       "9  Indiana Jones and the Last Crusade                  The Princess Bride   \n",
        "\n",
        "                             classics            what's popular  \\\n",
-       "0              The Philadelphia Story  The Shawshank Redemption   \n",
-       "1  Butch Cassidy and the Sundance Kid              Pulp Fiction   \n",
-       "2                           Star Wars              Blade Runner   \n",
-       "3        The Bridge on the River Kwai           The Dark Knight   \n",
-       "4    The Treasure of the Sierra Madre                  Whiplash   \n",
-       "5                   A Christmas Story                Fight Club   \n",
-       "6                  The Little Mermaid                Big Hero 6   \n",
-       "7                          Roger & Me              The Avengers   \n",
-       "8                  Dead Poets Society                 Gone Girl   \n",
-       "9                         Stand by Me   Guardians of the Galaxy   \n",
+       "0                             Yojimbo  The Shawshank Redemption   \n",
+       "1     Monty Python and the Holy Grail           The Dark Knight   \n",
+       "2                     Raising Arizona              Pulp Fiction   \n",
+       "3               To Kill a Mockingbird              Blade Runner   \n",
+       "4                          Annie Hall                Fight Club   \n",
+       "5  Indiana Jones and the Last Crusade              The Avengers   \n",
+       "6                        The Graduate                  Whiplash   \n",
+       "7                         Rear Window                Big Hero 6   \n",
+       "8        The Bridge on the River Kwai                 Gone Girl   \n",
+       "9                          Roger & Me   Guardians of the Galaxy   \n",
        "\n",
-       "            indie hits                         fruity films  \n",
-       "0                Shine          What's Eating Gilbert Grape  \n",
-       "1             La Haine                  The Grapes of Wrath  \n",
-       "2    Castle in the Sky                    Pineapple Express  \n",
-       "3        All About Eve                   A Clockwork Orange  \n",
-       "4                    M            James and the Giant Peach  \n",
-       "5        The Contender                              Bananas  \n",
-       "6          The Postman              The Apple Dumpling Gang  \n",
-       "7             Maverick                  Herbie Goes Bananas  \n",
-       "8  The Meaning of Life                        Orange County  \n",
-       "9          Frost/Nixon  The Apple Dumpling Gang Rides Again  "
+       "                       indie hits                 fruity films  \n",
+       "0                         Yojimbo           A Clockwork Orange  \n",
+       "1              My Neighbor Totoro            Pineapple Express  \n",
+       "2             The Meaning of Life  What's Eating Gilbert Grape  \n",
+       "3           Rebel Without a Cause    James and the Giant Peach  \n",
+       "4                The Professional          The Grapes of Wrath  \n",
+       "5                         Sanjuro                      Bananas  \n",
+       "6                   All About Eve      The Apple Dumpling Gang  \n",
+       "7  All Quiet on the Western Front                Orange County  \n",
+       "8         Cowboy Bebop: The Movie          Herbie Goes Bananas  \n",
+       "9                     City Lights                Adam's Apples  "
       ]
      },
-     "execution_count": 18,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1435,7 +1435,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1444,6 +1444,7 @@
     "    user_vector = user_vectors[user_id].tolist()\n",
     "    watched_movies = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()\n",
     "\n",
+    "    # filter out movies that the user has already watched\n",
     "    client.bf().insert('user_watched_list', [f\"{user_id}:{movie_id}\" for movie_id in watched_movies])\n",
     "\n",
     "    query = RangeQuery(vector=user_vector,\n",
@@ -1451,19 +1452,18 @@
     "                       num_results=num_results * 5,  # fetch more results to filter out watched movies\n",
     "                       filter_expression=filters,\n",
     "                       return_fields=['title', 'overview', 'genres', 'movieId'],\n",
-    "    ) # TODO figure out why i need to add '$.' to some fields, but not others\n",
+    "    )\n",
     "    results = movie_index.query(query)\n",
     "\n",
-    "    # filter out movies that the user has already watched\n",
-    "    recommendations = []\n",
-    "    for r in results:\n",
-    "        if not client.bf().exists('user_watched_list', f\"{user_id}:{r['movieId']}\"):\n",
-    "            recommendations.append((r['title'], r['overview'], r['genres'], r['vector_distance']))\n",
-    "        if len(recommendations) >= num_results:\n",
-    "            break\n",
-    "\n",
-    "    # add these films to the bloom filter\n",
-    "    client.bf().insert('user_watched_list', [f\"{user_id}:{r['movieId']}\" for r  in results])\n",
+    "    matches = client.bf().mexists(\"user_watched_list\", *[f\"{user_id}:{r['movieId']}\" for r in results])\n",
+    "\n",
+    "    recommendations = [\n",
+    "        (r['title'], r['overview'], r['genres'], r['vector_distance'], r['movieId'])\n",
+    "        for i, r in enumerate(results) if matches[i] == 0\n",
+    "    ][:num_results]\n",
+    "\n",
+    "    # add these recommendations to the bloom filter so they don't appear again\n",
+    "    client.bf().insert('user_watched_list', [f\"{user_id}:{r[4]}\" for r  in recommendations])\n",
     "    return recommendations\n",
     "\n",
     "# example usage\n",
@@ -1471,7 +1471,8 @@
     "try:\n",
     "    client.bf().create(f\"user_watched_list\", 0.01, 10000)\n",
     "except Exception as e:\n",
-    "    print(e)\n",
+    "    client.delete(\"user_watched_list\")\n",
+    "    client.bf().create(f\"user_watched_list\", 0.01, 10000)\n",
     "\n",
     "user_id = 42\n",
     "\n",
@@ -1484,7 +1485,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 19,
    "metadata": {
     "vscode": {
      "languageId": "ruby"
@@ -1522,65 +1523,72 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>The Silence of the Lambs</td>\n",
-       "      <td>Star Trek: Generations</td>\n",
-       "      <td>The Bridge on the River Kwai</td>\n",
+       "      <td>Yojimbo</td>\n",
+       "      <td>Annie Hall</td>\n",
+       "      <td>To Kill a Mockingbird</td>\n",
        "      <td>Blade Runner</td>\n",
-       "      <td>Shine</td>\n",
+       "      <td>My Neighbor Totoro</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Se7en</td>\n",
-       "      <td>Jumanji</td>\n",
-       "      <td>The Treasure of the Sierra Madre</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>La Haine</td>\n",
+       "      <td>Monty Python and the Holy Grail</td>\n",
+       "      <td>The Graduate</td>\n",
+       "      <td>The Bridge on the River Kwai</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>The Meaning of Life</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>The Philadelphia Story</td>\n",
-       "      <td>Outbreak</td>\n",
-       "      <td>A Christmas Story</td>\n",
-       "      <td>Fight Club</td>\n",
-       "      <td>Castle in the Sky</td>\n",
+       "      <td>Big Night</td>\n",
+       "      <td>Rear Window</td>\n",
+       "      <td>Roger &amp; Me</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>Rebel Without a Cause</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>A Close Shave</td>\n",
-       "      <td>The Lion King</td>\n",
-       "      <td>The Little Mermaid</td>\n",
+       "      <td>Raising Arizona</td>\n",
+       "      <td>American Beauty</td>\n",
+       "      <td>Cinema Paradiso</td>\n",
        "      <td>Big Hero 6</td>\n",
-       "      <td>All About Eve</td>\n",
+       "      <td>The Professional</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>The Usual Suspects</td>\n",
-       "      <td>Men in Black</td>\n",
-       "      <td>Roger &amp; Me</td>\n",
+       "      <td>Ed Wood</td>\n",
+       "      <td>Twelve Monkeys</td>\n",
+       "      <td>Dr. Strangelove or: How I Learned to Stop Worr...</td>\n",
        "      <td>Gone Girl</td>\n",
-       "      <td>M</td>\n",
+       "      <td>Sanjuro</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                  top picks           block busters  \\\n",
-       "0  The Silence of the Lambs  Star Trek: Generations   \n",
-       "1                     Se7en                 Jumanji   \n",
-       "2    The Philadelphia Story                Outbreak   \n",
-       "3             A Close Shave           The Lion King   \n",
-       "4        The Usual Suspects            Men in Black   \n",
+       "                         top picks    block busters  \\\n",
+       "0                          Yojimbo       Annie Hall   \n",
+       "1  Monty Python and the Holy Grail     The Graduate   \n",
+       "2                        Big Night      Rear Window   \n",
+       "3                  Raising Arizona  American Beauty   \n",
+       "4                          Ed Wood   Twelve Monkeys   \n",
+       "\n",
+       "                                            classics what's popular  \\\n",
+       "0                              To Kill a Mockingbird   Blade Runner   \n",
+       "1                       The Bridge on the River Kwai     Fight Club   \n",
+       "2                                         Roger & Me       Whiplash   \n",
+       "3                                    Cinema Paradiso     Big Hero 6   \n",
+       "4  Dr. Strangelove or: How I Learned to Stop Worr...      Gone Girl   \n",
        "\n",
-       "                           classics what's popular         indie hits  \n",
-       "0      The Bridge on the River Kwai   Blade Runner              Shine  \n",
-       "1  The Treasure of the Sierra Madre       Whiplash           La Haine  \n",
-       "2                 A Christmas Story     Fight Club  Castle in the Sky  \n",
-       "3                The Little Mermaid     Big Hero 6      All About Eve  \n",
-       "4                        Roger & Me      Gone Girl                  M  "
+       "              indie hits  \n",
+       "0     My Neighbor Totoro  \n",
+       "1    The Meaning of Life  \n",
+       "2  Rebel Without a Cause  \n",
+       "3       The Professional  \n",
+       "4                Sanjuro  "
       ]
      },
-     "execution_count": 20,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1607,14 +1615,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Deleted 4334 keys\n",
+      "Deleted 4337 keys\n",
       "Deleted 2000 keys\n",
       "Deleted 1000 keys\n",
       "Deleted 500 keys\n",
@@ -1627,7 +1635,7 @@
        "1"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }

From 2edd5b248a5946f653d2622ef1038a4a0ae985dc Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Mon, 14 Oct 2024 16:59:28 -0700
Subject: [PATCH 08/12] adds surprise to requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 216712e5..08d8a236 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,4 +20,4 @@ redisvl>=0.3.0
 pytest
 ragas
 datasets
-
+scikit-surprise

From e497231493a283f513a7f7c1b75a30b288fcfd52 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Tue, 15 Oct 2024 11:35:08 -0700
Subject: [PATCH 09/12] fixes typo in schema

---
 .../recommendation-systems/collaborative_filtering_schema.yaml  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml b/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
index 0a6f61b4..af58d793 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
+++ b/python-recipes/recommendation-systems/collaborative_filtering_schema.yaml
@@ -37,4 +37,4 @@ fields:
           dims: 100
           distance_metric: ip
           algorithm: flat
-          dtype: float32
\ No newline at end of file
+          datatype: float32
\ No newline at end of file

From 267947093f651d8c096788e937da358d6d6d4dfa Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 16 Oct 2024 11:12:23 -0700
Subject: [PATCH 10/12] adds reference to README, fixes typos

---
 README.md                                            |  1 +
 .../collaborative_filtering.ipynb                    | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index deac85df..0b576ce7 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,7 @@ For further insights on enhancing RAG applications with dense content representa
 | Recipe | Description |
 | --- | --- |
 | [/recommendation-systems/content_filtering.ipynb](python-recipes/recommendation-systems/content_filtering.ipynb) | Intro content filtering example with redisvl |
+| [/recommendation-systems/collaborative_filtering.ipynb](python-recipes/recommendation-systems/collaborative_filtering.ipynb) | Intro collaborative filtering example with redisvl |
 
 ### See also
 An exciting example of how Redis can power production-ready systems is highlighted in our collaboration with [NVIDIA](https://developer.nvidia.com/blog/offline-to-online-feature-storage-for-real-time-recommendation-systems-with-nvidia-merlin/) to construct a state-of-the-art recommendation system.
diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index 09344699..45fae026 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -17,7 +17,7 @@
    "source": [
     "Recommendation systems are a common application of machine learning and serve many industries from e-commerce to music streaming platforms.\n",
     "\n",
-    "There are many different architechtures that can be followed to build a recommendation system.\n",
+    "There are many different architectures that can be followed to build a recommendation system. In a previous example notebook we demonstrated how to do [content filtering with RedisVL](content_filtering.ipynb). We encourage you to start there before diving into this notebook.\n",
     "\n",
     "In this notebook we'll demonstrate how to build a [collaborative filtering](https://en.wikipedia.org/wiki/Collaborative_filtering)\n",
     "recommendation system and use the large IMDB movies dataset as our example data.\n",
@@ -268,7 +268,7 @@
     }
    ],
    "source": [
-    "# surprise casts userId and movieId to inner ids, so we have to use their mapping to now which rows to use\n",
+    "# surprise casts userId and movieId to inner ids, so we have to use their mapping to know which rows to use\n",
     "inner_uid = train_set.to_inner_uid(347) # userId\n",
     "inner_iid = train_set.to_inner_iid(5515) # movieId\n",
     "\n",
@@ -582,8 +582,8 @@
     "movies_df['overview'] = movies_df['overview'].fillna('')\n",
     "movies_df['popularity'] = movies_df['popularity'].fillna(0)\n",
     "movies_df['release_date'] = movies_df['release_date'].fillna('1900-01-01').apply(lambda x: datetime.datetime.strptime(x, \"%Y-%m-%d\").timestamp())\n",
-    "movies_df['revenue'] = movies_df['revenue'].fillna(0) # fill with average?\n",
-    "movies_df['runtime'] = movies_df['runtime'].fillna(0) # fill with average?\n",
+    "movies_df['revenue'] = movies_df['revenue'].fillna(0)\n",
+    "movies_df['runtime'] = movies_df['runtime'].fillna(0)\n",
     "movies_df['status'] = movies_df['status'].fillna('unknown')\n",
     "movies_df['tagline'] = movies_df['tagline'].fillna('')\n",
     "movies_df['title'] = movies_df['title'].fillna('')\n",
@@ -1196,7 +1196,7 @@
     "## Adding All the Bells & Whistles\n",
     "Vector search handles the bulk of our collaborative filtering recommendation system and is a great approach to generating personalized recommendations that are unique to each user.\n",
     "\n",
-    "To up our RecSys game even further we can leverage RedisVl filter logic to give more control to what users are shown. Why have only one feed of recommended movies when you can have several, each with its own theme and personalized to each user."
+    "To up our RecSys game even further we can leverage RedisVL Filter logic to give more control to what users are shown. Why have only one feed of recommended movies when you can have several, each with its own theme and personalized to each user."
    ]
   },
   {
@@ -1428,7 +1428,7 @@
    "metadata": {},
    "source": [
     "## Keeping Things Fresh\n",
-    "You've probably noticed that a few movies get repeated in these lists. That's not surprising as all our results are personalized and things like `popularity` and `user_rating` and `revenue` are likely highly correlated. And it's more that likely that at least some of the recommendations we're expecting to be highly rated by a given user are ones they've already watched and rated highly.\n",
+    "You've probably noticed that a few movies get repeated in these lists. That's not surprising as all our results are personalized and things like `popularity` and `user_rating` and `revenue` are likely highly correlated. And it's more than likely that at least some of the recommendations we're expecting to be highly rated by a given user are ones they've already watched and rated highly.\n",
     "\n",
     "Luckily Redis offers an easy answer to keeping recommendations new and interesting, and that answer is Bloom Filters."
    ]

From 39704ae09d73439192848579f3924c086fbc6a42 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 16 Oct 2024 15:15:37 -0700
Subject: [PATCH 11/12] stores user vector and watched list in Redis json

---
 .../collaborative_filtering.ipynb             | 403 ++++++++++--------
 1 file changed, 216 insertions(+), 187 deletions(-)

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index 45fae026..0a00bbd1 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -180,7 +180,7 @@
     {
      "data": {
       "text/plain": [
-       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1052209d0>"
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x106510e10>"
       ]
      },
      "execution_count": 5,
@@ -230,7 +230,7 @@
      "output_type": "stream",
      "text": [
       "we have 671 users with feature vectors of size 100\n",
-      "we have 8377 movies with feature vectors of size 100\n"
+      "we have 8415 movies with feature vectors of size 100\n"
      ]
     }
    ],
@@ -263,7 +263,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "the predicted rating of user 347 on movie 5515 is 1.3640325071309123\n"
+      "the predicted rating of user 347 on movie 5515 is 1.5939846458534452\n"
      ]
     }
    ],
@@ -690,7 +690,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[0.03713469204683083, 0.10796564373254629, 0.2...</td>\n",
+       "      <td>[-0.12329348744399116, -0.03395287506133206, 0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -712,7 +712,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[-0.010117012753361906, -0.03687474969254127, ...</td>\n",
+       "      <td>[-0.20839075686685218, 0.2842778495633789, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -734,7 +734,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[0.13139654322372601, 0.14560140137289648, 0.1...</td>\n",
+       "      <td>[-0.3250115780939791, 0.11093873287053337, 0.4...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -756,7 +756,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[0.1564855291020289, -0.01096475924961168, 0.2...</td>\n",
+       "      <td>[-0.08088437767077983, 0.1911468768682881, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -778,7 +778,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[0.07205704581865023, 0.25224445082871455, 0.0...</td>\n",
+       "      <td>[-0.007213409719480573, 0.20232376643634847, 0...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -821,11 +821,11 @@
        "4  Father of the Bride Part II           5.7         173       5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [0.03713469204683083, 0.10796564373254629, 0.2...  \n",
-       "1   8844.0  [-0.010117012753361906, -0.03687474969254127, ...  \n",
-       "2  15602.0  [0.13139654322372601, 0.14560140137289648, 0.1...  \n",
-       "3  31357.0  [0.1564855291020289, -0.01096475924961168, 0.2...  \n",
-       "4  11862.0  [0.07205704581865023, 0.25224445082871455, 0.0...  "
+       "0    862.0  [-0.12329348744399116, -0.03395287506133206, 0...  \n",
+       "1   8844.0  [-0.20839075686685218, 0.2842778495633789, 0.2...  \n",
+       "2  15602.0  [-0.3250115780939791, 0.11093873287053337, 0.4...  \n",
+       "3  31357.0  [-0.08088437767077983, 0.1911468768682881, 0.2...  \n",
+       "4  11862.0  [-0.007213409719480573, 0.20232376643634847, 0...  "
       ]
      },
      "execution_count": 11,
@@ -867,7 +867,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "15:33:21 redisvl.index.index INFO   Index already exists, overwriting.\n"
+      "15:07:36 redisvl.index.index INFO   Index already exists, overwriting.\n"
      ]
     }
    ],
@@ -881,31 +881,24 @@
     "movie_schema = IndexSchema.from_yaml(\"collaborative_filtering_schema.yaml\")\n",
     "\n",
     "movie_index = SearchIndex(movie_schema, redis_client=client)\n",
-    "movie_index.create(overwrite=True, drop=True)"
+    "movie_index.create(overwrite=True, drop=True)\n",
+    "\n",
+    "movie_keys = movie_index.load(movies_df.to_dict(orient='records'))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 13,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "keys = movie_index.load(movies_df.to_dict(orient='records'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "number of movies 8337\n",
-      "size of movie df 8337\n",
-      "unique movie ids 8331\n",
-      "unique movie titles 8100\n",
+      "number of movies 8370\n",
+      "size of movie df 8370\n",
+      "unique movie ids 8364\n",
+      "unique movie titles 8125\n",
       "unique movies rated 9065\n"
      ]
     },
@@ -972,7 +965,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[0.03713469204683083, 0.10796564373254629, 0.2...</td>\n",
+       "      <td>[-0.12329348744399116, -0.03395287506133206, 0...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -994,7 +987,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[-0.010117012753361906, -0.03687474969254127, ...</td>\n",
+       "      <td>[-0.20839075686685218, 0.2842778495633789, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1016,7 +1009,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[0.13139654322372601, 0.14560140137289648, 0.1...</td>\n",
+       "      <td>[-0.3250115780939791, 0.11093873287053337, 0.4...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1038,7 +1031,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[0.1564855291020289, -0.01096475924961168, 0.2...</td>\n",
+       "      <td>[-0.08088437767077983, 0.1911468768682881, 0.2...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1060,7 +1053,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[0.07205704581865023, 0.25224445082871455, 0.0...</td>\n",
+       "      <td>[-0.007213409719480573, 0.20232376643634847, 0...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1103,14 +1096,14 @@
        "4  Father of the Bride Part II           5.7         173       5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [0.03713469204683083, 0.10796564373254629, 0.2...  \n",
-       "1   8844.0  [-0.010117012753361906, -0.03687474969254127, ...  \n",
-       "2  15602.0  [0.13139654322372601, 0.14560140137289648, 0.1...  \n",
-       "3  31357.0  [0.1564855291020289, -0.01096475924961168, 0.2...  \n",
-       "4  11862.0  [0.07205704581865023, 0.25224445082871455, 0.0...  "
+       "0    862.0  [-0.12329348744399116, -0.03395287506133206, 0...  \n",
+       "1   8844.0  [-0.20839075686685218, 0.2842778495633789, 0.2...  \n",
+       "2  15602.0  [-0.3250115780939791, 0.11093873287053337, 0.4...  \n",
+       "3  31357.0  [-0.08088437767077983, 0.1911468768682881, 0.2...  \n",
+       "4  11862.0  [-0.007213409719480573, 0.20232376643634847, 0...  "
       ]
      },
-     "execution_count": 14,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1134,6 +1127,35 @@
     "movies_df.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For a complete solution we'll store the user vectors and their watched list in Redis also. We won't be searching over these user vectors so no need to define an index for them. A direct JSON look up will suffice."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redis.commands.json.path import Path\n",
+    "\n",
+    "# use a Redis pipeline to store user data and verify it in a single transaction\n",
+    "with client.pipeline() as pipe:\n",
+    "    for user_id, user_vector in user_vectors_and_ids.items():\n",
+    "        user_key = f\"user:{user_id}\"\n",
+    "        watched_list_ids = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()\n",
+    "\n",
+    "        user_data = {\n",
+    "            \"user_vector\": user_vector,\n",
+    "            \"watched_list_ids\": watched_list_ids\n",
+    "        }\n",
+    "        pipe.json().set(user_key, Path.root_path(), user_data)\n",
+    "        pipe.execute()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1154,25 +1176,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'movie:123a01ce087f4d09a833970c182f0eb2', 'vector_distance': '-2.13837456703', 'title': 'A Close Shave', 'genres': '[\"Family\",\"Animation\",\"Comedy\"]'}\n",
-      "{'id': 'movie:f6fb0a03ca0c41a4b1d63249ede39d2f', 'vector_distance': '-2.11249995232', 'title': \"Schindler's List\", 'genres': '[\"Drama\",\"History\",\"War\"]'}\n",
-      "{'id': 'movie:4d302b9754534983bf70b2304d04633e', 'vector_distance': '-2.09581518173', 'title': 'The African Queen', 'genres': '[\"Adventure\",\"War\",\"Romance\"]'}\n",
-      "{'id': 'movie:3eb10be0511641e48c41bb2de628bf6f', 'vector_distance': '-2.08978199959', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
-      "{'id': 'movie:6206dd42b51048edb819adc5fbe07ba7', 'vector_distance': '-2.07609891891', 'title': 'Forrest Gump', 'genres': '[\"Comedy\",\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:905af238977f40f793080388d0aa1380', 'vector_distance': '-2.05023360252', 'title': 'The Wrong Trousers', 'genres': '[\"Animation\",\"Comedy\",\"Family\"]'}\n",
-      "{'id': 'movie:ad7f5971e4b64a44a318e3e48105a114', 'vector_distance': '-2.03544998169', 'title': 'Star Wars', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:ac19e02ffd4c4833abdd3ecc4702abe9', 'vector_distance': '-1.98030018806', 'title': 'Monty Python and the Holy Grail', 'genres': '[\"Adventure\",\"Comedy\",\"Fantasy\"]'}\n",
-      "{'id': 'movie:7d6b88e1d652486f96756fc2b5a7f087', 'vector_distance': '-1.98028421402', 'title': 'Mad Max 2: The Road Warrior', 'genres': '[\"Adventure\",\"Action\",\"Thriller\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:595fd594f6c8406e91d7baa6bc63efdb', 'vector_distance': '-1.96802783012', 'title': 'Fargo', 'genres': '[\"Crime\",\"Drama\",\"Thriller\"]'}\n",
-      "{'id': 'movie:04e884d71097481c8c07d8babb723f02', 'vector_distance': '-1.93948292732', 'title': 'Roger & Me', 'genres': '[\"Documentary\",\"History\"]'}\n",
-      "{'id': 'movie:015dcb2c5d30445787c5392ac551abbb', 'vector_distance': '-1.92847204208', 'title': 'The Imitation Game', 'genres': '[\"History\",\"Drama\",\"Thriller\",\"War\"]'}\n"
+      "{'id': 'movie:255865ce253c4b7bbefaff7884035b0c', 'vector_distance': '-3.8687338829', 'title': 'Spirited Away', 'genres': '[\"Fantasy\",\"Adventure\",\"Animation\",\"Family\"]'}\n",
+      "{'id': 'movie:c833029c842143fdaf7bb5acedb051ce', 'vector_distance': '-3.73652648926', 'title': 'The Princess Bride', 'genres': '[\"Adventure\",\"Family\",\"Fantasy\",\"Comedy\",\"Romance\"]'}\n",
+      "{'id': 'movie:cf48a5443467433ca57c1741104cf123', 'vector_distance': '-3.66395378113', 'title': 'The Usual Suspects', 'genres': '[\"Drama\",\"Crime\",\"Thriller\"]'}\n",
+      "{'id': 'movie:a8707fc2440043a78e1b7ee92c5038cf', 'vector_distance': '-3.62124490738', 'title': 'The Shawshank Redemption', 'genres': '[\"Drama\",\"Crime\"]'}\n",
+      "{'id': 'movie:772b299da4e8427082e13fc542c80a9e', 'vector_distance': '-3.59598970413', 'title': 'A Beautiful Mind', 'genres': '[\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:9818b2cc529f4ef8af6b1e42618c7e19', 'vector_distance': '-3.57971763611', 'title': 'Cinema Paradiso', 'genres': '[\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:cd43f8fee7024fc0a3edd2cb155491cf', 'vector_distance': '-3.54007005692', 'title': 'The Empire Strikes Back', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:ca1c554ca7ef4da29e1a543147da54d9', 'vector_distance': '-3.53854608536', 'title': 'Like Water for Chocolate', 'genres': '[\"Drama\",\"Romance\"]'}\n",
+      "{'id': 'movie:f03df438a38349a4992222b6d37e81eb', 'vector_distance': '-3.4644536972', 'title': 'Roger & Me', 'genres': '[\"Documentary\",\"History\"]'}\n",
+      "{'id': 'movie:e1b6f8ad41d2425a8b470f0d206038bf', 'vector_distance': '-3.45273590088', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
+      "{'id': 'movie:1d2091b80efd4052b2c54390f8f25172', 'vector_distance': '-3.44259595871', 'title': 'Star Wars', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
+      "{'id': 'movie:b41bb158cd0b4362955e3800fd2cfb9d', 'vector_distance': '-3.40954303741', 'title': 'The Lord of the Rings: The Two Towers', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n"
      ]
     }
    ],
    "source": [
     "from redisvl.query import RangeQuery\n",
     "\n",
-    "user_vector = user_vectors[352].tolist()\n",
+    "user_vector = client.json().get(f\"user:{352}\")[\"user_vector\"]\n",
     "\n",
     "# the distance metric 'ip' inner product is computing \"score = 1 - u * v\" and returning the minimum, which corresponds to the max of \"u * v\"\n",
     "# this is what we want. The predicted rating on a scale of 0 to 5 is then -(score - 1) == -score + 1\n",
@@ -1209,7 +1231,7 @@
     "from redisvl.query.filter import Tag, Num, Text\n",
     "\n",
     "def get_recommendations(user_id, filters=None, num_results=10):\n",
-    "    user_vector = user_vectors[user_id].tolist()\n",
+    "    user_vector = client.json().get(f\"user:{user_id}\")[\"user_vector\"]\n",
     "    query = RangeQuery(vector=user_vector,\n",
     "                       vector_field_name='movie_vector',\n",
     "                       num_results=num_results,\n",
@@ -1276,133 +1298,145 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>The Shawshank Redemption</td>\n",
-       "      <td>Good Will Hunting</td>\n",
-       "      <td>Yojimbo</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>Cinema Paradiso</td>\n",
        "      <td>The Shawshank Redemption</td>\n",
        "      <td>Yojimbo</td>\n",
-       "      <td>A Clockwork Orange</td>\n",
+       "      <td>What's Eating Gilbert Grape</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Yojimbo</td>\n",
-       "      <td>Annie Hall</td>\n",
-       "      <td>Monty Python and the Holy Grail</td>\n",
+       "      <td>Cinema Paradiso</td>\n",
+       "      <td>The Lord of the Rings: The Fellowship of the Ring</td>\n",
+       "      <td>The Godfather</td>\n",
        "      <td>The Dark Knight</td>\n",
-       "      <td>My Neighbor Totoro</td>\n",
-       "      <td>Pineapple Express</td>\n",
+       "      <td>La Haine</td>\n",
+       "      <td>The Grapes of Wrath</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Monty Python and the Holy Grail</td>\n",
-       "      <td>Indiana Jones and the Last Crusade</td>\n",
-       "      <td>Raising Arizona</td>\n",
+       "      <td>Band of Brothers</td>\n",
+       "      <td>Schindler's List</td>\n",
+       "      <td>The Empire Strikes Back</td>\n",
        "      <td>Pulp Fiction</td>\n",
-       "      <td>The Meaning of Life</td>\n",
-       "      <td>What's Eating Gilbert Grape</td>\n",
+       "      <td>The Postman</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>Big Night</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>To Kill a Mockingbird</td>\n",
-       "      <td>Blade Runner</td>\n",
-       "      <td>Rebel Without a Cause</td>\n",
-       "      <td>James and the Giant Peach</td>\n",
+       "      <td>A Grand Day Out</td>\n",
+       "      <td>The Empire Strikes Back</td>\n",
+       "      <td>Star Wars</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>Seven Samurai</td>\n",
+       "      <td>Bananas</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Raising Arizona</td>\n",
-       "      <td>Rear Window</td>\n",
-       "      <td>Annie Hall</td>\n",
-       "      <td>Fight Club</td>\n",
-       "      <td>The Professional</td>\n",
-       "      <td>The Grapes of Wrath</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>The Lord of the Rings: The Two Towers</td>\n",
+       "      <td>The Philadelphia Story</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>Shine</td>\n",
+       "      <td>Pineapple Express</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>Ed Wood</td>\n",
-       "      <td>Star Trek</td>\n",
-       "      <td>Indiana Jones and the Last Crusade</td>\n",
-       "      <td>The Avengers</td>\n",
-       "      <td>Sanjuro</td>\n",
-       "      <td>Bananas</td>\n",
+       "      <td>The Lord of the Rings: The Fellowship of the Ring</td>\n",
+       "      <td>Star Wars</td>\n",
+       "      <td>Mr. Smith Goes to Washington</td>\n",
+       "      <td>Big Hero 6</td>\n",
+       "      <td>My Neighbor Totoro</td>\n",
+       "      <td>James and the Giant Peach</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>Good Will Hunting</td>\n",
-       "      <td>American Beauty</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>All About Eve</td>\n",
+       "      <td>Schindler's List</td>\n",
+       "      <td>The Fugitive</td>\n",
+       "      <td>Empire of the Sun</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>The Professional</td>\n",
        "      <td>The Apple Dumpling Gang</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>To Kill a Mockingbird</td>\n",
-       "      <td>Schindler's List</td>\n",
-       "      <td>Rear Window</td>\n",
-       "      <td>Big Hero 6</td>\n",
-       "      <td>All Quiet on the Western Front</td>\n",
+       "      <td>The Empire Strikes Back</td>\n",
+       "      <td>The Matrix</td>\n",
+       "      <td>Stand by Me</td>\n",
+       "      <td>The Avengers</td>\n",
+       "      <td>All About Eve</td>\n",
        "      <td>Orange County</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>Annie Hall</td>\n",
-       "      <td>Twelve Monkeys</td>\n",
-       "      <td>The Bridge on the River Kwai</td>\n",
-       "      <td>Gone Girl</td>\n",
-       "      <td>Cowboy Bebop: The Movie</td>\n",
-       "      <td>Herbie Goes Bananas</td>\n",
+       "      <td>The Lord of the Rings: The Two Towers</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>The Princess Bride</td>\n",
+       "      <td>Guardians of the Galaxy</td>\n",
+       "      <td>Rebel Without a Cause</td>\n",
+       "      <td>The Apple Dumpling Gang Rides Again</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>Indiana Jones and the Last Crusade</td>\n",
-       "      <td>The Princess Bride</td>\n",
-       "      <td>Roger &amp; Me</td>\n",
-       "      <td>Guardians of the Galaxy</td>\n",
-       "      <td>City Lights</td>\n",
-       "      <td>Adam's Apples</td>\n",
+       "      <td>The Usual Suspects</td>\n",
+       "      <td>Pulp Fiction</td>\n",
+       "      <td>Raiders of the Lost Ark</td>\n",
+       "      <td>Gone Girl</td>\n",
+       "      <td>Bicycle Thieves</td>\n",
+       "      <td>Herbie Goes Bananas</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                            top picks                       block busters  \\\n",
-       "0            The Shawshank Redemption                   Good Will Hunting   \n",
-       "1                             Yojimbo                          Annie Hall   \n",
-       "2     Monty Python and the Holy Grail  Indiana Jones and the Last Crusade   \n",
-       "3                           Big Night                        The Graduate   \n",
-       "4                     Raising Arizona                         Rear Window   \n",
-       "5                             Ed Wood                           Star Trek   \n",
-       "6                   Good Will Hunting                     American Beauty   \n",
-       "7               To Kill a Mockingbird                    Schindler's List   \n",
-       "8                          Annie Hall                      Twelve Monkeys   \n",
-       "9  Indiana Jones and the Last Crusade                  The Princess Bride   \n",
+       "                                           top picks  \\\n",
+       "0                           The Shawshank Redemption   \n",
+       "1                                    Cinema Paradiso   \n",
+       "2                                   Band of Brothers   \n",
+       "3                                    A Grand Day Out   \n",
+       "4                                      The Godfather   \n",
+       "5  The Lord of the Rings: The Fellowship of the Ring   \n",
+       "6                                   Schindler's List   \n",
+       "7                            The Empire Strikes Back   \n",
+       "8              The Lord of the Rings: The Two Towers   \n",
+       "9                                 The Usual Suspects   \n",
        "\n",
-       "                             classics            what's popular  \\\n",
-       "0                             Yojimbo  The Shawshank Redemption   \n",
-       "1     Monty Python and the Holy Grail           The Dark Knight   \n",
-       "2                     Raising Arizona              Pulp Fiction   \n",
-       "3               To Kill a Mockingbird              Blade Runner   \n",
-       "4                          Annie Hall                Fight Club   \n",
-       "5  Indiana Jones and the Last Crusade              The Avengers   \n",
-       "6                        The Graduate                  Whiplash   \n",
-       "7                         Rear Window                Big Hero 6   \n",
-       "8        The Bridge on the River Kwai                 Gone Girl   \n",
-       "9                          Roger & Me   Guardians of the Galaxy   \n",
+       "                                       block busters  \\\n",
+       "0                                      The Godfather   \n",
+       "1  The Lord of the Rings: The Fellowship of the Ring   \n",
+       "2                                   Schindler's List   \n",
+       "3                            The Empire Strikes Back   \n",
+       "4              The Lord of the Rings: The Two Towers   \n",
+       "5                                          Star Wars   \n",
+       "6                                       The Fugitive   \n",
+       "7                                         The Matrix   \n",
+       "8                                    The Dark Knight   \n",
+       "9                                       Pulp Fiction   \n",
        "\n",
-       "                       indie hits                 fruity films  \n",
-       "0                         Yojimbo           A Clockwork Orange  \n",
-       "1              My Neighbor Totoro            Pineapple Express  \n",
-       "2             The Meaning of Life  What's Eating Gilbert Grape  \n",
-       "3           Rebel Without a Cause    James and the Giant Peach  \n",
-       "4                The Professional          The Grapes of Wrath  \n",
-       "5                         Sanjuro                      Bananas  \n",
-       "6                   All About Eve      The Apple Dumpling Gang  \n",
-       "7  All Quiet on the Western Front                Orange County  \n",
-       "8         Cowboy Bebop: The Movie          Herbie Goes Bananas  \n",
-       "9                     City Lights                Adam's Apples  "
+       "                       classics            what's popular  \\\n",
+       "0               Cinema Paradiso  The Shawshank Redemption   \n",
+       "1                 The Godfather           The Dark Knight   \n",
+       "2       The Empire Strikes Back              Pulp Fiction   \n",
+       "3                     Star Wars                  Whiplash   \n",
+       "4        The Philadelphia Story              Blade Runner   \n",
+       "5  Mr. Smith Goes to Washington                Big Hero 6   \n",
+       "6             Empire of the Sun                Fight Club   \n",
+       "7                   Stand by Me              The Avengers   \n",
+       "8            The Princess Bride   Guardians of the Galaxy   \n",
+       "9       Raiders of the Lost Ark                 Gone Girl   \n",
+       "\n",
+       "              indie hits                         fruity films  \n",
+       "0                Yojimbo          What's Eating Gilbert Grape  \n",
+       "1               La Haine                  The Grapes of Wrath  \n",
+       "2            The Postman                   A Clockwork Orange  \n",
+       "3          Seven Samurai                              Bananas  \n",
+       "4                  Shine                    Pineapple Express  \n",
+       "5     My Neighbor Totoro            James and the Giant Peach  \n",
+       "6       The Professional              The Apple Dumpling Gang  \n",
+       "7          All About Eve                        Orange County  \n",
+       "8  Rebel Without a Cause  The Apple Dumpling Gang Rides Again  \n",
+       "9        Bicycle Thieves                  Herbie Goes Bananas  "
       ]
      },
      "execution_count": 17,
@@ -1441,8 +1475,9 @@
    "source": [
     "# rewrite the get_recommendations() function to use a bloom filter and apply it before we return results\n",
     "def get_unique_recommendations(user_id, filters=None, num_results=10):\n",
-    "    user_vector = user_vectors[user_id].tolist()\n",
-    "    watched_movies = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()\n",
+    "    user_data = client.json().get(f\"user:{user_id}\")\n",
+    "    user_vector = user_data[\"user_vector\"]\n",
+    "    watched_movies = user_data[\"watched_list_ids\"]\n",
     "\n",
     "    # filter out movies that the user has already watched\n",
     "    client.bf().insert('user_watched_list', [f\"{user_id}:{movie_id}\" for movie_id in watched_movies])\n",
@@ -1523,69 +1558,62 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
+       "      <td>Cinema Paradiso</td>\n",
+       "      <td>Se7en</td>\n",
+       "      <td>Mr. Smith Goes to Washington</td>\n",
+       "      <td>Whiplash</td>\n",
        "      <td>Yojimbo</td>\n",
-       "      <td>Annie Hall</td>\n",
-       "      <td>To Kill a Mockingbird</td>\n",
-       "      <td>Blade Runner</td>\n",
-       "      <td>My Neighbor Totoro</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Monty Python and the Holy Grail</td>\n",
-       "      <td>The Graduate</td>\n",
-       "      <td>The Bridge on the River Kwai</td>\n",
-       "      <td>Fight Club</td>\n",
-       "      <td>The Meaning of Life</td>\n",
+       "      <td>A Grand Day Out</td>\n",
+       "      <td>Stand by Me</td>\n",
+       "      <td>Empire of the Sun</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>La Haine</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Big Night</td>\n",
-       "      <td>Rear Window</td>\n",
-       "      <td>Roger &amp; Me</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>Rebel Without a Cause</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>The Prestige</td>\n",
+       "      <td>The Godfather: Part II</td>\n",
+       "      <td>Big Hero 6</td>\n",
+       "      <td>The Postman</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>Raising Arizona</td>\n",
-       "      <td>American Beauty</td>\n",
-       "      <td>Cinema Paradiso</td>\n",
-       "      <td>Big Hero 6</td>\n",
-       "      <td>The Professional</td>\n",
+       "      <td>The Usual Suspects</td>\n",
+       "      <td>The Princess Bride</td>\n",
+       "      <td>Roger &amp; Me</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>Seven Samurai</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Ed Wood</td>\n",
-       "      <td>Twelve Monkeys</td>\n",
-       "      <td>Dr. Strangelove or: How I Learned to Stop Worr...</td>\n",
+       "      <td>The Philadelphia Story</td>\n",
+       "      <td>Rain Man</td>\n",
+       "      <td>It Happened One Night</td>\n",
        "      <td>Gone Girl</td>\n",
-       "      <td>Sanjuro</td>\n",
+       "      <td>Shine</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                         top picks    block busters  \\\n",
-       "0                          Yojimbo       Annie Hall   \n",
-       "1  Monty Python and the Holy Grail     The Graduate   \n",
-       "2                        Big Night      Rear Window   \n",
-       "3                  Raising Arizona  American Beauty   \n",
-       "4                          Ed Wood   Twelve Monkeys   \n",
+       "                top picks       block busters                      classics  \\\n",
+       "0         Cinema Paradiso               Se7en  Mr. Smith Goes to Washington   \n",
+       "1         A Grand Day Out         Stand by Me             Empire of the Sun   \n",
+       "2           The Godfather        The Prestige        The Godfather: Part II   \n",
+       "3      The Usual Suspects  The Princess Bride                    Roger & Me   \n",
+       "4  The Philadelphia Story            Rain Man         It Happened One Night   \n",
        "\n",
-       "                                            classics what's popular  \\\n",
-       "0                              To Kill a Mockingbird   Blade Runner   \n",
-       "1                       The Bridge on the River Kwai     Fight Club   \n",
-       "2                                         Roger & Me       Whiplash   \n",
-       "3                                    Cinema Paradiso     Big Hero 6   \n",
-       "4  Dr. Strangelove or: How I Learned to Stop Worr...      Gone Girl   \n",
-       "\n",
-       "              indie hits  \n",
-       "0     My Neighbor Totoro  \n",
-       "1    The Meaning of Life  \n",
-       "2  Rebel Without a Cause  \n",
-       "3       The Professional  \n",
-       "4                Sanjuro  "
+       "  what's popular     indie hits  \n",
+       "0       Whiplash        Yojimbo  \n",
+       "1   Blade Runner       La Haine  \n",
+       "2     Big Hero 6    The Postman  \n",
+       "3     Fight Club  Seven Samurai  \n",
+       "4      Gone Girl          Shine  "
       ]
      },
      "execution_count": 19,
@@ -1602,7 +1630,7 @@
     "all_recommendations[\"what's popular\"] = [m[0] for m in whats_popular]\n",
     "all_recommendations[\"indie hits\"] = [m[0] for m in indie_hits]\n",
     "\n",
-    "all_recommendations.head(10)"
+    "all_recommendations.head()"
    ]
   },
   {
@@ -1622,7 +1650,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Deleted 4337 keys\n",
+      "Deleted 4370 keys\n",
       "Deleted 2000 keys\n",
       "Deleted 1000 keys\n",
       "Deleted 500 keys\n",
@@ -1632,7 +1660,7 @@
     {
      "data": {
       "text/plain": [
-       "1"
+       "671"
       ]
      },
      "execution_count": 20,
@@ -1645,7 +1673,8 @@
     "while remaining := movie_index.clear():\n",
     "    print(f\"Deleted {remaining} keys\")\n",
     "\n",
-    "client.delete(\"user_watched_list\")"
+    "client.delete(\"user_watched_list\")\n",
+    "client.delete(*[f\"user:{user_id}\" for user_id in user_vectors_and_ids.keys()])"
    ]
   }
  ],

From 9eb40678785ff0fc4c3f1bb5f5b222026ded1e35 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Fri, 18 Oct 2024 12:09:19 -0700
Subject: [PATCH 12/12] computes predicted rating from vector distance. Updates
 comments about tags vs bloom filters

---
 .../collaborative_filtering.ipynb             | 354 +++++++++---------
 1 file changed, 179 insertions(+), 175 deletions(-)

diff --git a/python-recipes/recommendation-systems/collaborative_filtering.ipynb b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
index 0a00bbd1..e96054d3 100644
--- a/python-recipes/recommendation-systems/collaborative_filtering.ipynb
+++ b/python-recipes/recommendation-systems/collaborative_filtering.ipynb
@@ -180,7 +180,7 @@
     {
      "data": {
       "text/plain": [
-       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x106510e10>"
+       "<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11dbf2410>"
       ]
      },
      "execution_count": 5,
@@ -210,7 +210,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now that the the SVD algorithm has computed our `[U]` and `[M]` matrices - which are both really just lists of vectors - we can load them into our Redis instance.\n",
+    "Now that the SVD algorithm has computed our `[U]` and `[M]` matrices - which are both really just lists of vectors - we can load them into our Redis instance.\n",
     "\n",
     "The Surprise SVD model stores user and movie vectors in two attributes:\n",
     "\n",
@@ -230,7 +230,7 @@
      "output_type": "stream",
      "text": [
       "we have 671 users with feature vectors of size 100\n",
-      "we have 8415 movies with feature vectors of size 100\n"
+      "we have 8397 movies with feature vectors of size 100\n"
      ]
     }
    ],
@@ -263,7 +263,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "the predicted rating of user 347 on movie 5515 is 1.5939846458534452\n"
+      "the predicted rating of user 347 on movie 5515 is 1.1069607933289707\n"
      ]
     }
    ],
@@ -690,7 +690,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[-0.12329348744399116, -0.03395287506133206, 0...</td>\n",
+       "      <td>[0.12184447241197785, -0.16994406060791697, 0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -712,7 +712,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[-0.20839075686685218, 0.2842778495633789, 0.2...</td>\n",
+       "      <td>[0.14683581574270926, -0.06365576587872183, 0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -734,7 +734,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[-0.3250115780939791, 0.11093873287053337, 0.4...</td>\n",
+       "      <td>[0.16698051985699827, -0.02406109383254372, 0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -756,7 +756,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[-0.08088437767077983, 0.1911468768682881, 0.2...</td>\n",
+       "      <td>[-0.10740791019437969, 0.09007945525146789, 0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -778,7 +778,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[-0.007213409719480573, 0.20232376643634847, 0...</td>\n",
+       "      <td>[0.11311012532803581, 0.025998675845395405, 0....</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -821,11 +821,11 @@
        "4  Father of the Bride Part II           5.7         173       5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [-0.12329348744399116, -0.03395287506133206, 0...  \n",
-       "1   8844.0  [-0.20839075686685218, 0.2842778495633789, 0.2...  \n",
-       "2  15602.0  [-0.3250115780939791, 0.11093873287053337, 0.4...  \n",
-       "3  31357.0  [-0.08088437767077983, 0.1911468768682881, 0.2...  \n",
-       "4  11862.0  [-0.007213409719480573, 0.20232376643634847, 0...  "
+       "0    862.0  [0.12184447241197785, -0.16994406060791697, 0....  \n",
+       "1   8844.0  [0.14683581574270926, -0.06365576587872183, 0....  \n",
+       "2  15602.0  [0.16698051985699827, -0.02406109383254372, 0....  \n",
+       "3  31357.0  [-0.10740791019437969, 0.09007945525146789, 0....  \n",
+       "4  11862.0  [0.11311012532803581, 0.025998675845395405, 0....  "
       ]
      },
      "execution_count": 11,
@@ -867,7 +867,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "15:07:36 redisvl.index.index INFO   Index already exists, overwriting.\n"
+      "12:05:35 redisvl.index.index INFO   Index already exists, overwriting.\n"
      ]
     }
    ],
@@ -895,10 +895,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "number of movies 8370\n",
-      "size of movie df 8370\n",
-      "unique movie ids 8364\n",
-      "unique movie titles 8125\n",
+      "number of movies 8358\n",
+      "size of movie df 8358\n",
+      "unique movie ids 8352\n",
+      "unique movie titles 8115\n",
       "unique movies rated 9065\n"
      ]
     },
@@ -965,7 +965,7 @@
        "      <td>1</td>\n",
        "      <td>114709</td>\n",
        "      <td>862.0</td>\n",
-       "      <td>[-0.12329348744399116, -0.03395287506133206, 0...</td>\n",
+       "      <td>[0.12184447241197785, -0.16994406060791697, 0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -987,7 +987,7 @@
        "      <td>2</td>\n",
        "      <td>113497</td>\n",
        "      <td>8844.0</td>\n",
-       "      <td>[-0.20839075686685218, 0.2842778495633789, 0.2...</td>\n",
+       "      <td>[0.14683581574270926, -0.06365576587872183, 0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1009,7 +1009,7 @@
        "      <td>3</td>\n",
        "      <td>113228</td>\n",
        "      <td>15602.0</td>\n",
-       "      <td>[-0.3250115780939791, 0.11093873287053337, 0.4...</td>\n",
+       "      <td>[0.16698051985699827, -0.02406109383254372, 0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1031,7 +1031,7 @@
        "      <td>4</td>\n",
        "      <td>114885</td>\n",
        "      <td>31357.0</td>\n",
-       "      <td>[-0.08088437767077983, 0.1911468768682881, 0.2...</td>\n",
+       "      <td>[-0.10740791019437969, 0.09007945525146789, 0....</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1053,7 +1053,7 @@
        "      <td>5</td>\n",
        "      <td>113041</td>\n",
        "      <td>11862.0</td>\n",
-       "      <td>[-0.007213409719480573, 0.20232376643634847, 0...</td>\n",
+       "      <td>[0.11311012532803581, 0.025998675845395405, 0....</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1096,11 +1096,11 @@
        "4  Father of the Bride Part II           5.7         173       5  113041   \n",
        "\n",
        "    tmdbId                                       movie_vector  \n",
-       "0    862.0  [-0.12329348744399116, -0.03395287506133206, 0...  \n",
-       "1   8844.0  [-0.20839075686685218, 0.2842778495633789, 0.2...  \n",
-       "2  15602.0  [-0.3250115780939791, 0.11093873287053337, 0.4...  \n",
-       "3  31357.0  [-0.08088437767077983, 0.1911468768682881, 0.2...  \n",
-       "4  11862.0  [-0.007213409719480573, 0.20232376643634847, 0...  "
+       "0    862.0  [0.12184447241197785, -0.16994406060791697, 0....  \n",
+       "1   8844.0  [0.14683581574270926, -0.06365576587872183, 0....  \n",
+       "2  15602.0  [0.16698051985699827, -0.02406109383254372, 0....  \n",
+       "3  31357.0  [-0.10740791019437969, 0.09007945525146789, 0....  \n",
+       "4  11862.0  [0.11311012532803581, 0.025998675845395405, 0....  "
       ]
      },
      "execution_count": 13,
@@ -1176,18 +1176,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'movie:255865ce253c4b7bbefaff7884035b0c', 'vector_distance': '-3.8687338829', 'title': 'Spirited Away', 'genres': '[\"Fantasy\",\"Adventure\",\"Animation\",\"Family\"]'}\n",
-      "{'id': 'movie:c833029c842143fdaf7bb5acedb051ce', 'vector_distance': '-3.73652648926', 'title': 'The Princess Bride', 'genres': '[\"Adventure\",\"Family\",\"Fantasy\",\"Comedy\",\"Romance\"]'}\n",
-      "{'id': 'movie:cf48a5443467433ca57c1741104cf123', 'vector_distance': '-3.66395378113', 'title': 'The Usual Suspects', 'genres': '[\"Drama\",\"Crime\",\"Thriller\"]'}\n",
-      "{'id': 'movie:a8707fc2440043a78e1b7ee92c5038cf', 'vector_distance': '-3.62124490738', 'title': 'The Shawshank Redemption', 'genres': '[\"Drama\",\"Crime\"]'}\n",
-      "{'id': 'movie:772b299da4e8427082e13fc542c80a9e', 'vector_distance': '-3.59598970413', 'title': 'A Beautiful Mind', 'genres': '[\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:9818b2cc529f4ef8af6b1e42618c7e19', 'vector_distance': '-3.57971763611', 'title': 'Cinema Paradiso', 'genres': '[\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:cd43f8fee7024fc0a3edd2cb155491cf', 'vector_distance': '-3.54007005692', 'title': 'The Empire Strikes Back', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:ca1c554ca7ef4da29e1a543147da54d9', 'vector_distance': '-3.53854608536', 'title': 'Like Water for Chocolate', 'genres': '[\"Drama\",\"Romance\"]'}\n",
-      "{'id': 'movie:f03df438a38349a4992222b6d37e81eb', 'vector_distance': '-3.4644536972', 'title': 'Roger & Me', 'genres': '[\"Documentary\",\"History\"]'}\n",
-      "{'id': 'movie:e1b6f8ad41d2425a8b470f0d206038bf', 'vector_distance': '-3.45273590088', 'title': 'The Lord of the Rings: The Fellowship of the Ring', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n",
-      "{'id': 'movie:1d2091b80efd4052b2c54390f8f25172', 'vector_distance': '-3.44259595871', 'title': 'Star Wars', 'genres': '[\"Adventure\",\"Action\",\"Science Fiction\"]'}\n",
-      "{'id': 'movie:b41bb158cd0b4362955e3800fd2cfb9d', 'vector_distance': '-3.40954303741', 'title': 'The Lord of the Rings: The Two Towers', 'genres': '[\"Adventure\",\"Fantasy\",\"Action\"]'}\n"
+      "vector distance: -3.63527393,\t predicted rating: 4.63527393,\t title: Fight Club, \n",
+      "vector distance: -3.60445881,\t predicted rating: 4.60445881,\t title: All About Eve, \n",
+      "vector distance: -3.60197020,\t predicted rating: 4.60197020,\t title: Lock, Stock and Two Smoking Barrels, \n",
+      "vector distance: -3.59518766,\t predicted rating: 4.59518766,\t title: Midnight in Paris, \n",
+      "vector distance: -3.58543396,\t predicted rating: 4.58543396,\t title: It Happened One Night, \n",
+      "vector distance: -3.54092789,\t predicted rating: 4.54092789,\t title: Anne Frank Remembered, \n",
+      "vector distance: -3.51044893,\t predicted rating: 4.51044893,\t title: Pulp Fiction, \n",
+      "vector distance: -3.50941706,\t predicted rating: 4.50941706,\t title: Raging Bull, \n",
+      "vector distance: -3.49180365,\t predicted rating: 4.49180365,\t title: Cool Hand Luke, \n",
+      "vector distance: -3.47437143,\t predicted rating: 4.47437143,\t title: Rear Window, \n",
+      "vector distance: -3.41378117,\t predicted rating: 4.41378117,\t title: The Usual Suspects, \n",
+      "vector distance: -3.40533876,\t predicted rating: 4.40533876,\t title: Princess Mononoke, \n"
      ]
     }
    ],
@@ -1208,7 +1208,9 @@
     "results = movie_index.query(query)\n",
     "\n",
     "for r in results:\n",
-    "    print(r)"
+    "    # compute our predicted rating on a scale of 0 to 5 from our vector distance\n",
+    "    r['predicted_rating'] = - float(r['vector_distance']) + 1.\n",
+    "    print(f\"vector distance: {float(r['vector_distance']):.08f},\\t predicted rating: {r['predicted_rating']:.08f},\\t title: {r['title']}, \")"
    ]
   },
   {
@@ -1298,145 +1300,145 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>The Shawshank Redemption</td>\n",
-       "      <td>The Godfather</td>\n",
+       "      <td>Forrest Gump</td>\n",
        "      <td>Cinema Paradiso</td>\n",
        "      <td>The Shawshank Redemption</td>\n",
-       "      <td>Yojimbo</td>\n",
+       "      <td>Castle in the Sky</td>\n",
        "      <td>What's Eating Gilbert Grape</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Cinema Paradiso</td>\n",
-       "      <td>The Lord of the Rings: The Fellowship of the Ring</td>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>La Haine</td>\n",
-       "      <td>The Grapes of Wrath</td>\n",
+       "      <td>Forrest Gump</td>\n",
+       "      <td>The Silence of the Lambs</td>\n",
+       "      <td>The African Queen</td>\n",
+       "      <td>Pulp Fiction</td>\n",
+       "      <td>My Neighbor Totoro</td>\n",
+       "      <td>A Clockwork Orange</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Band of Brothers</td>\n",
-       "      <td>Schindler's List</td>\n",
-       "      <td>The Empire Strikes Back</td>\n",
+       "      <td>Cinema Paradiso</td>\n",
        "      <td>Pulp Fiction</td>\n",
-       "      <td>The Postman</td>\n",
-       "      <td>A Clockwork Orange</td>\n",
+       "      <td>Raiders of the Lost Ark</td>\n",
+       "      <td>The Dark Knight</td>\n",
+       "      <td>All Quiet on the Western Front</td>\n",
+       "      <td>The Grapes of Wrath</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>A Grand Day Out</td>\n",
+       "      <td>Lock, Stock and Two Smoking Barrels</td>\n",
+       "      <td>Raiders of the Lost Ark</td>\n",
        "      <td>The Empire Strikes Back</td>\n",
-       "      <td>Star Wars</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>Seven Samurai</td>\n",
-       "      <td>Bananas</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>Army of Darkness</td>\n",
+       "      <td>Pineapple Express</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>The Lord of the Rings: The Two Towers</td>\n",
-       "      <td>The Philadelphia Story</td>\n",
-       "      <td>Blade Runner</td>\n",
-       "      <td>Shine</td>\n",
-       "      <td>Pineapple Express</td>\n",
+       "      <td>The African Queen</td>\n",
+       "      <td>The Empire Strikes Back</td>\n",
+       "      <td>Indiana Jones and the Last Crusade</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>All About Eve</td>\n",
+       "      <td>James and the Giant Peach</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>The Lord of the Rings: The Fellowship of the Ring</td>\n",
+       "      <td>The Silence of the Lambs</td>\n",
+       "      <td>Indiana Jones and the Last Crusade</td>\n",
        "      <td>Star Wars</td>\n",
-       "      <td>Mr. Smith Goes to Washington</td>\n",
-       "      <td>Big Hero 6</td>\n",
-       "      <td>My Neighbor Totoro</td>\n",
-       "      <td>James and the Giant Peach</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>The Professional</td>\n",
+       "      <td>Bananas</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
+       "      <td>Pulp Fiction</td>\n",
        "      <td>Schindler's List</td>\n",
-       "      <td>The Fugitive</td>\n",
-       "      <td>Empire of the Sun</td>\n",
-       "      <td>Fight Club</td>\n",
-       "      <td>The Professional</td>\n",
-       "      <td>The Apple Dumpling Gang</td>\n",
+       "      <td>The Manchurian Candidate</td>\n",
+       "      <td>The Avengers</td>\n",
+       "      <td>Shine</td>\n",
+       "      <td>Orange County</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>The Empire Strikes Back</td>\n",
-       "      <td>The Matrix</td>\n",
-       "      <td>Stand by Me</td>\n",
-       "      <td>The Avengers</td>\n",
-       "      <td>All About Eve</td>\n",
-       "      <td>Orange County</td>\n",
+       "      <td>Raiders of the Lost Ark</td>\n",
+       "      <td>The Lord of the Rings: The Return of the King</td>\n",
+       "      <td>The Godfather: Part II</td>\n",
+       "      <td>Guardians of the Galaxy</td>\n",
+       "      <td>Yojimbo</td>\n",
+       "      <td>Herbie Goes Bananas</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
+       "      <td>The Empire Strikes Back</td>\n",
        "      <td>The Lord of the Rings: The Two Towers</td>\n",
-       "      <td>The Dark Knight</td>\n",
-       "      <td>The Princess Bride</td>\n",
-       "      <td>Guardians of the Galaxy</td>\n",
-       "      <td>Rebel Without a Cause</td>\n",
-       "      <td>The Apple Dumpling Gang Rides Again</td>\n",
+       "      <td>Castle in the Sky</td>\n",
+       "      <td>Gone Girl</td>\n",
+       "      <td>Belle de Jour</td>\n",
+       "      <td>The Apple Dumpling Gang</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>The Usual Suspects</td>\n",
-       "      <td>Pulp Fiction</td>\n",
-       "      <td>Raiders of the Lost Ark</td>\n",
-       "      <td>Gone Girl</td>\n",
-       "      <td>Bicycle Thieves</td>\n",
-       "      <td>Herbie Goes Bananas</td>\n",
+       "      <td>Indiana Jones and the Last Crusade</td>\n",
+       "      <td>Terminator 2: Judgment Day</td>\n",
+       "      <td>Back to the Future</td>\n",
+       "      <td>Big Hero 6</td>\n",
+       "      <td>Local Hero</td>\n",
+       "      <td>Adam's Apples</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                                           top picks  \\\n",
-       "0                           The Shawshank Redemption   \n",
-       "1                                    Cinema Paradiso   \n",
-       "2                                   Band of Brothers   \n",
-       "3                                    A Grand Day Out   \n",
-       "4                                      The Godfather   \n",
-       "5  The Lord of the Rings: The Fellowship of the Ring   \n",
-       "6                                   Schindler's List   \n",
-       "7                            The Empire Strikes Back   \n",
-       "8              The Lord of the Rings: The Two Towers   \n",
-       "9                                 The Usual Suspects   \n",
+       "                             top picks  \\\n",
+       "0             The Shawshank Redemption   \n",
+       "1                         Forrest Gump   \n",
+       "2                      Cinema Paradiso   \n",
+       "3  Lock, Stock and Two Smoking Barrels   \n",
+       "4                    The African Queen   \n",
+       "5             The Silence of the Lambs   \n",
+       "6                         Pulp Fiction   \n",
+       "7              Raiders of the Lost Ark   \n",
+       "8              The Empire Strikes Back   \n",
+       "9   Indiana Jones and the Last Crusade   \n",
        "\n",
-       "                                       block busters  \\\n",
-       "0                                      The Godfather   \n",
-       "1  The Lord of the Rings: The Fellowship of the Ring   \n",
-       "2                                   Schindler's List   \n",
-       "3                            The Empire Strikes Back   \n",
-       "4              The Lord of the Rings: The Two Towers   \n",
-       "5                                          Star Wars   \n",
-       "6                                       The Fugitive   \n",
-       "7                                         The Matrix   \n",
-       "8                                    The Dark Knight   \n",
-       "9                                       Pulp Fiction   \n",
+       "                                   block busters  \\\n",
+       "0                                   Forrest Gump   \n",
+       "1                       The Silence of the Lambs   \n",
+       "2                                   Pulp Fiction   \n",
+       "3                        Raiders of the Lost Ark   \n",
+       "4                        The Empire Strikes Back   \n",
+       "5             Indiana Jones and the Last Crusade   \n",
+       "6                               Schindler's List   \n",
+       "7  The Lord of the Rings: The Return of the King   \n",
+       "8          The Lord of the Rings: The Two Towers   \n",
+       "9                     Terminator 2: Judgment Day   \n",
        "\n",
-       "                       classics            what's popular  \\\n",
-       "0               Cinema Paradiso  The Shawshank Redemption   \n",
-       "1                 The Godfather           The Dark Knight   \n",
-       "2       The Empire Strikes Back              Pulp Fiction   \n",
-       "3                     Star Wars                  Whiplash   \n",
-       "4        The Philadelphia Story              Blade Runner   \n",
-       "5  Mr. Smith Goes to Washington                Big Hero 6   \n",
-       "6             Empire of the Sun                Fight Club   \n",
-       "7                   Stand by Me              The Avengers   \n",
-       "8            The Princess Bride   Guardians of the Galaxy   \n",
-       "9       Raiders of the Lost Ark                 Gone Girl   \n",
+       "                             classics            what's popular  \\\n",
+       "0                     Cinema Paradiso  The Shawshank Redemption   \n",
+       "1                   The African Queen              Pulp Fiction   \n",
+       "2             Raiders of the Lost Ark           The Dark Knight   \n",
+       "3             The Empire Strikes Back                Fight Club   \n",
+       "4  Indiana Jones and the Last Crusade                  Whiplash   \n",
+       "5                           Star Wars              Blade Runner   \n",
+       "6            The Manchurian Candidate              The Avengers   \n",
+       "7              The Godfather: Part II   Guardians of the Galaxy   \n",
+       "8                   Castle in the Sky                 Gone Girl   \n",
+       "9                  Back to the Future                Big Hero 6   \n",
        "\n",
-       "              indie hits                         fruity films  \n",
-       "0                Yojimbo          What's Eating Gilbert Grape  \n",
-       "1               La Haine                  The Grapes of Wrath  \n",
-       "2            The Postman                   A Clockwork Orange  \n",
-       "3          Seven Samurai                              Bananas  \n",
-       "4                  Shine                    Pineapple Express  \n",
-       "5     My Neighbor Totoro            James and the Giant Peach  \n",
-       "6       The Professional              The Apple Dumpling Gang  \n",
-       "7          All About Eve                        Orange County  \n",
-       "8  Rebel Without a Cause  The Apple Dumpling Gang Rides Again  \n",
-       "9        Bicycle Thieves                  Herbie Goes Bananas  "
+       "                       indie hits                 fruity films  \n",
+       "0               Castle in the Sky  What's Eating Gilbert Grape  \n",
+       "1              My Neighbor Totoro           A Clockwork Orange  \n",
+       "2  All Quiet on the Western Front          The Grapes of Wrath  \n",
+       "3                Army of Darkness            Pineapple Express  \n",
+       "4                   All About Eve    James and the Giant Peach  \n",
+       "5                The Professional                      Bananas  \n",
+       "6                           Shine                Orange County  \n",
+       "7                         Yojimbo          Herbie Goes Bananas  \n",
+       "8                   Belle de Jour      The Apple Dumpling Gang  \n",
+       "9                      Local Hero                Adam's Apples  "
       ]
      },
      "execution_count": 17,
@@ -1464,6 +1466,8 @@
     "## Keeping Things Fresh\n",
     "You've probably noticed that a few movies get repeated in these lists. That's not surprising as all our results are personalized and things like `popularity` and `user_rating` and `revenue` are likely highly correlated. And it's more than likely that at least some of the recommendations we're expecting to be highly rated by a given user are ones they've already watched and rated highly.\n",
     "\n",
+    "We need a way to filter out movies that a user has already seen, and movies that we've already recommended to them before.\n",
+    "We could use a Tag filter on our queries to filter out movies by their id, but this gets cumbersome quickly.\n",
     "Luckily Redis offers an easy answer to keeping recommendations new and interesting, and that answer is Bloom Filters."
    ]
   },
@@ -1479,12 +1483,12 @@
     "    user_vector = user_data[\"user_vector\"]\n",
     "    watched_movies = user_data[\"watched_list_ids\"]\n",
     "\n",
-    "    # filter out movies that the user has already watched\n",
+    "    # use a Bloom Filter to filter out movies that the user has already watched\n",
     "    client.bf().insert('user_watched_list', [f\"{user_id}:{movie_id}\" for movie_id in watched_movies])\n",
     "\n",
     "    query = RangeQuery(vector=user_vector,\n",
     "                       vector_field_name='movie_vector',\n",
-    "                       num_results=num_results * 5,  # fetch more results to filter out watched movies\n",
+    "                       num_results=num_results * 5,  # fetch more results to account for watched movies\n",
     "                       filter_expression=filters,\n",
     "                       return_fields=['title', 'overview', 'genres', 'movieId'],\n",
     "    )\n",
@@ -1502,7 +1506,7 @@
     "    return recommendations\n",
     "\n",
     "# example usage\n",
-    "# create a bloom filter for this user\n",
+    "# create a bloom filter for all our users\n",
     "try:\n",
     "    client.bf().create(f\"user_watched_list\", 0.01, 10000)\n",
     "except Exception as e:\n",
@@ -1559,41 +1563,41 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Cinema Paradiso</td>\n",
-       "      <td>Se7en</td>\n",
-       "      <td>Mr. Smith Goes to Washington</td>\n",
-       "      <td>Whiplash</td>\n",
-       "      <td>Yojimbo</td>\n",
+       "      <td>The Manchurian Candidate</td>\n",
+       "      <td>Castle in the Sky</td>\n",
+       "      <td>Fight Club</td>\n",
+       "      <td>All Quiet on the Western Front</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>A Grand Day Out</td>\n",
-       "      <td>Stand by Me</td>\n",
-       "      <td>Empire of the Sun</td>\n",
-       "      <td>Blade Runner</td>\n",
-       "      <td>La Haine</td>\n",
+       "      <td>Lock, Stock and Two Smoking Barrels</td>\n",
+       "      <td>Toy Story</td>\n",
+       "      <td>12 Angry Men</td>\n",
+       "      <td>Whiplash</td>\n",
+       "      <td>Army of Darkness</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>The Godfather</td>\n",
-       "      <td>The Prestige</td>\n",
+       "      <td>The African Queen</td>\n",
        "      <td>The Godfather: Part II</td>\n",
-       "      <td>Big Hero 6</td>\n",
-       "      <td>The Postman</td>\n",
+       "      <td>My Neighbor Totoro</td>\n",
+       "      <td>Blade Runner</td>\n",
+       "      <td>All About Eve</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>The Usual Suspects</td>\n",
-       "      <td>The Princess Bride</td>\n",
-       "      <td>Roger &amp; Me</td>\n",
-       "      <td>Fight Club</td>\n",
-       "      <td>Seven Samurai</td>\n",
+       "      <td>The Silence of the Lambs</td>\n",
+       "      <td>Back to the Future</td>\n",
+       "      <td>It Happened One Night</td>\n",
+       "      <td>Gone Girl</td>\n",
+       "      <td>The Professional</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>The Philadelphia Story</td>\n",
-       "      <td>Rain Man</td>\n",
-       "      <td>It Happened One Night</td>\n",
-       "      <td>Gone Girl</td>\n",
+       "      <td>Eat Drink Man Woman</td>\n",
+       "      <td>The Godfather</td>\n",
+       "      <td>Stand by Me</td>\n",
+       "      <td>Big Hero 6</td>\n",
        "      <td>Shine</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1601,19 +1605,19 @@
        "</div>"
       ],
       "text/plain": [
-       "                top picks       block busters                      classics  \\\n",
-       "0         Cinema Paradiso               Se7en  Mr. Smith Goes to Washington   \n",
-       "1         A Grand Day Out         Stand by Me             Empire of the Sun   \n",
-       "2           The Godfather        The Prestige        The Godfather: Part II   \n",
-       "3      The Usual Suspects  The Princess Bride                    Roger & Me   \n",
-       "4  The Philadelphia Story            Rain Man         It Happened One Night   \n",
+       "                             top picks             block busters  \\\n",
+       "0                      Cinema Paradiso  The Manchurian Candidate   \n",
+       "1  Lock, Stock and Two Smoking Barrels                 Toy Story   \n",
+       "2                    The African Queen    The Godfather: Part II   \n",
+       "3             The Silence of the Lambs        Back to the Future   \n",
+       "4                  Eat Drink Man Woman             The Godfather   \n",
        "\n",
-       "  what's popular     indie hits  \n",
-       "0       Whiplash        Yojimbo  \n",
-       "1   Blade Runner       La Haine  \n",
-       "2     Big Hero 6    The Postman  \n",
-       "3     Fight Club  Seven Samurai  \n",
-       "4      Gone Girl          Shine  "
+       "                classics what's popular                      indie hits  \n",
+       "0      Castle in the Sky     Fight Club  All Quiet on the Western Front  \n",
+       "1           12 Angry Men       Whiplash                Army of Darkness  \n",
+       "2     My Neighbor Totoro   Blade Runner                   All About Eve  \n",
+       "3  It Happened One Night      Gone Girl                The Professional  \n",
+       "4            Stand by Me     Big Hero 6                           Shine  "
       ]
      },
      "execution_count": 19,
@@ -1650,7 +1654,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Deleted 4370 keys\n",
+      "Deleted 4358 keys\n",
       "Deleted 2000 keys\n",
       "Deleted 1000 keys\n",
       "Deleted 500 keys\n",