scriptit-fr · aubrypaul · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/src/code.gs b/src/code.gs
@@ -33,6 +33,9 @@ const GenAIApp = (function () {
   const globalMetadata = {};
   const addedVectorStores = {};
 
+  const modelForVision = "gemini-3-pro-preview";
+  let promptForVision = "Describe the images, transcribe any visible text, and summarize the visual context.";
+
   const MAX_FILE_SIZE = 20 * 1024 * 1024; // 20MB in bytes
 
   /**
@@ -112,12 +115,32 @@ const GenAIApp = (function () {
           const response = UrlFetchApp.fetch(imageInput);
           const blob = response.getBlob();
           const base64Image = Utilities.base64Encode(blob.getBytes());
+          let mimeType = blob.getContentType();
+          if (!mimeType || !mimeType.startsWith("image/")) {
+            let pathname;
+            try {
+              pathname = new URL(imageInput).pathname.toLowerCase();
+            } catch {
+              pathname = imageInput.split("?")[0].split("#")[0].toLowerCase();
+            }
+            if (pathname.endsWith(".png")) {
+              mimeType = "image/png";
+            } else if (pathname.endsWith(".jpg") || pathname.endsWith(".jpeg")) {
+              mimeType = "image/jpeg";
+            } else if (pathname.endsWith(".webp")) {
+              mimeType = "image/webp";
+            } else if (pathname.endsWith(".gif")) {
+              mimeType = "image/gif";
+            } else {
+              throw new Error("Failed to identify a valid image MIME type. Please check the file format for Gemini.");
+            }
+          }
           contents.push({
             role: "user",
             parts: [
               {
-                inline_data: {
-                  mime_type: blob.getContentType(),
+                inlineData: {
+                  mime_type: mimeType,
                   data: base64Image
                 }
               }
@@ -195,7 +218,7 @@ const GenAIApp = (function () {
         contents.push({
           role: 'user',
           parts: [{
-            inline_data: {
+            inlineData: {
               mime_type: fileInfo.mimeType,
               data: blobToBase64
             }
@@ -422,6 +445,13 @@ const GenAIApp = (function () {
           knowledgeLink = [];
         }
 
+        // Gemini does not support using images together with vector stores (RAG) yet.
+        // Images must be analyzed first and replaced with text before RAG processing.
+        const ragCorpusIds = Object.keys(addedVectorStores);
+        if (ragCorpusIds.length > 0 && model.includes("gemini") && gcpProjectId) {
+          contents = this._convertImagesToText(contents);
+        }
+
         let payload;
         if (model.includes("gemini")) {
           payload = this._buildGeminiPayload(advancedParametersObject);
@@ -737,6 +767,98 @@ const GenAIApp = (function () {
         return payload;
       }
 
+      /**
+       * Replaces all image parts in a Gemini conversation with a text description
+       * generated by Gemini 3 Pro Preview (Vertex AI Vision).
+       *
+       * - Detects images (inlineData / fileData) across all messages
+       * - Sends them to Gemini Vision for analysis
+       * - Removes images from the conversation
+       * - Appends a new message containing the image analysis
+       *
+       * @param {Array<Object>} currentContents
+       *   Gemini conversation contents.
+       *
+       * @returns {Array<Object>}
+       *   Updated contents with images removed and a text analysis appended.
+       */
+      this._convertImagesToText = function (currentContents) {
+        if (!currentContents || currentContents.length === 0) return currentContents;
+
+        const hasImages = currentContents.some(c => {
+          const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []);
+          return parts.some(p => p.inlineData || p.fileData);
+        });
+
+        if (!hasImages) return currentContents;
+
+        if (verbose) {
+          console.log("[GenAIApp] - Images detected. Converting to text description...");
+        }
+
+        const imageParts = currentContents.flatMap(c => {
+          const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []);
+          return parts.filter(p => p.inlineData || p.fileData);
+        });
+
+        const descriptionPayload = {
+          contents: [{
+            role: "user",
+            parts: [
+              ...imageParts,
+              { text: promptForVision}
+            ]
+          }],
+          generationConfig: {
+            temperature: 0.2,
+            maxOutputTokens: 2000
+          }
+        };
+
+        const options = {
+          method: 'post',
+          contentType: 'application/json',
+          headers: {
+            'Authorization': 'Bearer ' + ScriptApp.getOAuthToken()
+          },
+          payload: JSON.stringify(descriptionPayload),
+          muteHttpExceptions: true
+      };
+
+        const endpoint = `https://aiplatform.googleapis.com/v1/projects/${gcpProjectId}/locations/global/publishers/google/models/${modelForVision}:generateContent`;
+        let description = "Image analysis returned no text.";
+        try {
+          const response = UrlFetchApp.fetch(endpoint, options);
+          const result = JSON.parse(response.getContentText());
+
+          if (result?.candidates?.[0]?.content?.parts?.[0]?.text) {
+            description = result.candidates[0].content.parts[0].text;
+          } else if (result?.parts?.[0]?.text) {
+            description = result.parts[0].text;
+          }
+        } catch (error) {
+          Logger.log(`[GenAIApp] - Image analysis failed during Gemini Vision preprocessing: ${error}`);
+        }
+
+        let newContents = JSON.parse(JSON.stringify(currentContents));
+        newContents.forEach(c => {
+          const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []);
+          c.parts = parts.filter(p => !p.inlineData && !p.fileData);
+        });
+
+        newContents = newContents.filter(c => {
+          const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []);
+          return parts.length > 0;
+        });
+
+        newContents.push({
+          role: "user",
+          parts: [{ text: `IMAGE ANALYSIS:\n${description}` }]
+        });  
+
+        return newContents;
+      }
+
       /**
        * Get a blob from a Google Drive file ID
        *
@@ -2254,6 +2376,20 @@ const GenAIApp = (function () {
      */
     setPrivateInstanceBaseUrl: function (baseUrl) {
       privateInstanceBaseUrl = baseUrl;
+    },
+
+    /**
+     * Sets the prompt used to describe images when using Gemini with RAG.
+     *
+     * Gemini does not support combining images and vector stores directly.
+     * When RAG is enabled, images are first analyzed and replaced with text
+     * using this prompt before querying the Gemini vector store.
+     *
+     * @param {string} prompt The prompt to use for image description.
+     */
+    setPromptForVision: function (prompt) {
+      promptForVision = prompt;
     }
+
   }
 })();