Merge pull request #31 from jeremyLabrado/feat/multi-image-recipe-generation

brnaba-aws · web-flow · commit fa8afa7711f4 · 2025-12-04T09:56:20.000+01:00
feat: Add multi-image recipe generation feature
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ We developed this exhibit to create an interactive serverless application using
 - **Personalized product information**: Curious about what is in a product and if it is good for you?
 Just scan the barcode with the app for an explained list of ingredients/allergens and a personalized summary based on your preferences, health goals, and dietary restrictions. The app provides direct allergen detection and quantitative nutritional analysis using data from Open Food Facts.
 
-- **Personalized recipe generator**: Capture a photo of the ingredients in your fridge, and the app will generate recipes based on your preferences using those ingredients.
+- **Personalized recipe generator**: Capture multiple photos of ingredients in your fridge and pantry, and the app will generate recipes based on your preferences using those ingredients.
 
 
 ## Demo
@@ -129,9 +129,9 @@ The architecture of the application can be split in 4 blocks:
 
 #### Food aliment detection
 
-- **Strategy**: Extract ingredients from the image, works well on fruits and vegetables.
+- **Strategy**: Extract ingredients from multiple images, works well on fruits and vegetables. Users can capture their entire fridge and pantry to enable comprehensive meal planning.
 
-- **Implementation**: We use Anthropic Claude 3 Sonnet on Amazon Bedrock with its vision capabilities to extract only food elements from the image. This allows us to focus on the food elements and ignore the background or other elements in the image. Claude 3 is a multi-modal model that can handle both text and images. The output is a list of ingredients present in the image.
+- **Implementation**: We use Anthropic Claude 3 Sonnet on Amazon Bedrock with its vision capabilities to extract only food elements from the images. This allows us to focus on the food elements and ignore the background or other elements in the images. Claude 3 is a multi-modal model that can handle both text and images. The output is a list of ingredients present across all captured images. The backend processes multiple images via the `list_images_base64` array parameter.
 
 - **Prompt Engineering**: To exploit the full potential of the model, we use a system prompt. A system prompt is a way to provide context, instructions, and guidelines to Claude before presenting it with a question or task. By using a system prompt, you can set the stage for the conversation, specifying Claude's role, personality, tone, or any other relevant information that will help it to better understand and respond to the user's input.
 
diff --git a/resources/ui/src/assets/i18n/all.ts b/resources/ui/src/assets/i18n/all.ts
@@ -49,6 +49,10 @@ const customTranslations: Record<string, Record<string, string>> = {
     preference_other_placeholder: "I want to eat healthy",
     recipe_retake_photo: "Capture new photo",
     recipe_use_this: "Use selected image",
+    recipe_add_image: "Add to collection",
+    recipe_add_more: "Add more photos",
+    recipe_captured_images: "Captured photos",
+    recipe_generate_recipes: "Generate recipes",
     recipe_search_video_src: "Searching for video devices in progress ...",
     recipe_take_picture: "Capture image",
     image_ingredients_loading: "Identifying ingredients in image...",
@@ -137,6 +141,10 @@ const customTranslations: Record<string, Record<string, string>> = {
     product_name_label: "Produit",
     recipe_retake_photo: "Prendre une nouvelle photo",
     recipe_use_this: "Utiliser cette image",
+    recipe_add_image: "Ajouter à la collection",
+    recipe_add_more: "Ajouter plus de photos",
+    recipe_captured_images: "Photos capturées",
+    recipe_generate_recipes: "Générer des recettes",
     recipe_search_video_src: "Recherche des appareils vidéo en cours...",
     recipe_take_picture: "Prendre une photo",
     image_ingredients_loading: "Identification des ingrédients dans l'image...",
@@ -225,6 +233,10 @@ const customTranslations: Record<string, Record<string, string>> = {
     preference_other_placeholder: "Voglio mangiare in modo sano",
     recipe_retake_photo: "Riprova",
     recipe_use_this: "Usa",
+    recipe_add_image: "Aggiungi alla raccolta",
+    recipe_add_more: "Aggiungi altre foto",
+    recipe_captured_images: "Foto acquisite",
+    recipe_generate_recipes: "Genera ricette",
     recipe_search_video_src: "Ricerca di dispositivi video in corso…",
     recipe_take_picture: "Cattura immagine",
     image_ingredients_loading:
@@ -314,6 +326,10 @@ const customTranslations: Record<string, Record<string, string>> = {
     preference_other_placeholder: "Quiero comer saludable",
     recipe_retake_photo: "Capturar nueva foto",
     recipe_use_this: "Usar imagen seleccionada",
+    recipe_add_image: "Añadir a la colección",
+    recipe_add_more: "Añadir más fotos",
+    recipe_captured_images: "Fotos capturadas",
+    recipe_generate_recipes: "Generar recetas",
     recipe_search_video_src: "Buscando dispositivos de video en progreso ...",
     recipe_take_picture: "Capturar imagen",
     image_ingredients_loading: "Identificando ingredientes en la imagen...",
@@ -393,6 +409,10 @@ arabic:{
     "preference_other_placeholder": "أريد أن أتناول طعامًا صحيًا",
     "recipe_retake_photo": "التقط صورة جديدة",
     "recipe_use_this": "استخدم الصورة المحددة",
+    "recipe_add_image": "أضف إلى المجموعة",
+    "recipe_add_more": "أضف المزيد من الصور",
+    "recipe_captured_images": "الصور الملتقطة",
+    "recipe_generate_recipes": "إنشاء الوصفات",
     "recipe_search_video_src": "جارٍ البحث عن أجهزة الفيديو ...",
     "recipe_take_picture": "التقط صورة",
     "image_ingredients_loading": "جارٍ تحديد المكونات في الصورة...",
diff --git a/resources/ui/src/pages/components/recipe.tsx b/resources/ui/src/pages/components/recipe.tsx
@@ -21,7 +21,8 @@ const Recipe: React.FC = () => {
   const webcamRef = useRef<any>();
   const [imgSrc, setImgSrc] = useState<string | null>(null);
   const [myValue, setMyValue] = useState([]);
-  const [selectedImgSrc, setSelectedImgSrc] = useState<string | null>(null);
+  const [capturedImages, setCapturedImages] = useState<string[]>([]);
+  const [selectedImgSrc, setSelectedImgSrc] = useState<string[]>([]);
   const [showWebcam, setShowWebcam] = useState(false);
   const [showOptionsButtons, setShowOptionsButtons] = useState(true);
   const [loadingVideoDevices, setLoadingVideoDevices] = useState(false);
@@ -131,8 +132,20 @@ function resizeBase64Image(base64Image: string, width: number, height: number):
     }
   }, [webcamRef]);
 
+  const addImage = () => {
+    if (imgSrc) {
+      setCapturedImages([...capturedImages, imgSrc]);
+      setImgSrc(null);
+      setShowOptionsButtons(true);
+    }
+  };
+
+  const removeImage = (index: number) => {
+    setCapturedImages(capturedImages.filter((_, i) => i !== index));
+  };
+
   const startWebcam = () => {
-    setSelectedImgSrc(null);
+    setSelectedImgSrc([]);
     setImgSrc(null);
     setShowWebcam(true);
     enumerateDevices();
@@ -141,17 +154,17 @@ function resizeBase64Image(base64Image: string, width: number, height: number):
   const retake = () => {
     setImgSrc(null);
   };
-  const useThisImage = () => {
-    setShowOptionsButtons(false);
-    //setShowWebcam(false);
 
-    setSelectedImgSrc(imgSrc);
+  const useTheseImages = () => {
+    const allImages = imgSrc ? [...capturedImages, imgSrc] : capturedImages;
+    setSelectedImgSrc(allImages);
+    setShowOptionsButtons(false);
   };
 
   const currentTranslations = customTranslations[language];
 
   const fileUploadOnChange = ({ detail }) => {
-    setSelectedImgSrc(null);
+    setSelectedImgSrc([]);
     console.log(detail.value);
     const files = detail.value;
     if (files.length > 0) {
@@ -239,6 +252,42 @@ function resizeBase64Image(base64Image: string, width: number, height: number):
               )}
 
               {/* Conditionally render the image */}
+              {capturedImages.length > 0 && !imgSrc && (
+                <div style={{ textAlign: "center" }}>
+                  <h4>{currentTranslations["recipe_captured_images"]} ({capturedImages.length})</h4>
+                  <div style={{ display: "flex", flexWrap: "wrap", gap: "10px", justifyContent: "center" }}>
+                    {capturedImages.map((img, index) => (
+                      <div key={index} style={{ position: "relative" }}>
+                        <img
+                          src={img}
+                          style={{
+                            borderRadius: "5px",
+                            height: "150px",
+                            objectFit: "cover",
+                          }}
+                        />
+                        <Button
+                          iconName="close"
+                          variant="icon"
+                          onClick={() => removeImage(index)}
+                          ariaLabel="Remove image"
+                        />
+                      </div>
+                    ))}
+                  </div>
+                  <div style={{ marginTop: "10px" }}>
+                    <SpaceBetween direction="horizontal" size="s">
+                      <Button onClick={startWebcam} variant="normal">
+                        {currentTranslations["recipe_add_more"]}
+                      </Button>
+                      <Button onClick={useTheseImages} variant="primary">
+                        {currentTranslations["recipe_generate_recipes"]}
+                      </Button>
+                    </SpaceBetween>
+                  </div>
+                </div>
+              )}
+
               {imgSrc && (
                 <>
                   <div style={{ textAlign: "center", maxHeight: "70vh" }}>
@@ -263,12 +312,16 @@ function resizeBase64Image(base64Image: string, width: number, height: number):
                       }}
                     >
                       <SpaceBetween direction="horizontal" size="s">
-                        {/* <Button onClick={retake} variant="primary">
+                        <Button onClick={retake} variant="normal">
                           {currentTranslations["recipe_retake_photo"]}
-                        </Button> */}
-
-                        <Button onClick={useThisImage} variant="primary">
-                          {currentTranslations["recipe_use_this"]}
+                        </Button>
+                        <Button onClick={addImage} variant="normal">
+                          {currentTranslations["recipe_add_image"]}
+                        </Button>
+                        <Button onClick={useTheseImages} variant="primary">
+                          {capturedImages.length > 0 
+                            ? currentTranslations["recipe_generate_recipes"]
+                            : currentTranslations["recipe_use_this"]}
                         </Button>
                       </SpaceBetween>
                     </div>
@@ -348,10 +401,10 @@ function resizeBase64Image(base64Image: string, width: number, height: number):
 
         <div id="reader"></div>
 
-        {selectedImgSrc && (
+        {selectedImgSrc.length > 0 && (
           <div>
             <ImageIngredients
-              img={selectedImgSrc}
+              images={selectedImgSrc}
               language={language}
               onRecipePropositionsDone={() => {
                 setShowWebcam(false);
diff --git a/resources/ui/src/pages/components/recipe_image_ingredients.tsx b/resources/ui/src/pages/components/recipe_image_ingredients.tsx
@@ -13,29 +13,29 @@ import { on } from "events";
 import { FlowItems } from "./flowitems";
 
 interface RecipeImageIngredientsProps {
-  img: string;
+  images: string[];
   language: string;
   onRecipePropositionsDone?: () => void;
 }
 
 const RecipeImageIngredients: React.FC<RecipeImageIngredientsProps> = ({
-  img,
+  images,
   language,
   onRecipePropositionsDone,
 }) => {
   const currentTranslations = customTranslations[language];
-  const [loadingImageIngredients, setLoadingImageIngredients] = useState(true); // Added loading state
+  const [loadingImageIngredients, setLoadingImageIngredients] = useState(true);
   const [imageIngredientsResponse, setImageIngredientsResponse] = useState<
     any[]
   >([]);
-  const [responseReceived, setResponseReceived] = useState(false); // Added loading state
+  const [responseReceived, setResponseReceived] = useState(false);
 
   useEffect(() => {
     const fetchData = async () => {
       try {
         setResponseReceived(false);
         const body = {
-          list_images_base64: [img],
+          list_images_base64: images,
           language: language,
         };
 
@@ -54,7 +54,7 @@ const RecipeImageIngredients: React.FC<RecipeImageIngredientsProps> = ({
     };
 
     fetchData();
-  }, [img, language]);
+  }, [images, language]);
 
   return (
     <TextContent>