Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 139 additions & 3 deletions src/code.gs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ const GenAIApp = (function () {
const globalMetadata = {};
const addedVectorStores = {};

const modelForVision = "gemini-3-pro-preview";
let promptForVision = "Describe the images, transcribe any visible text, and summarize the visual context.";

const MAX_FILE_SIZE = 20 * 1024 * 1024; // 20MB in bytes

/**
Expand Down Expand Up @@ -112,12 +115,32 @@ const GenAIApp = (function () {
const response = UrlFetchApp.fetch(imageInput);
const blob = response.getBlob();
const base64Image = Utilities.base64Encode(blob.getBytes());
let mimeType = blob.getContentType();
if (!mimeType || !mimeType.startsWith("image/")) {
let pathname;
try {
pathname = new URL(imageInput).pathname.toLowerCase();
} catch {
pathname = imageInput.split("?")[0].split("#")[0].toLowerCase();
}
if (pathname.endsWith(".png")) {
mimeType = "image/png";
} else if (pathname.endsWith(".jpg") || pathname.endsWith(".jpeg")) {
mimeType = "image/jpeg";
} else if (pathname.endsWith(".webp")) {
mimeType = "image/webp";
} else if (pathname.endsWith(".gif")) {
mimeType = "image/gif";
} else {
throw new Error("Failed to identify a valid image MIME type. Please check the file format for Gemini.");
}
}
contents.push({
role: "user",
parts: [
{
inline_data: {
mime_type: blob.getContentType(),
inlineData: {
mime_type: mimeType,
data: base64Image
}
}
Expand Down Expand Up @@ -195,7 +218,7 @@ const GenAIApp = (function () {
contents.push({
role: 'user',
parts: [{
inline_data: {
inlineData: {
mime_type: fileInfo.mimeType,
data: blobToBase64
}
Expand Down Expand Up @@ -422,6 +445,13 @@ const GenAIApp = (function () {
knowledgeLink = [];
}

// Gemini does not support using images together with vector stores (RAG) yet.
// Images must be analyzed first and replaced with text before RAG processing.
const ragCorpusIds = Object.keys(addedVectorStores);
if (ragCorpusIds.length > 0 && model.includes("gemini") && gcpProjectId) {
contents = this._convertImagesToText(contents);
}

let payload;
if (model.includes("gemini")) {
payload = this._buildGeminiPayload(advancedParametersObject);
Expand Down Expand Up @@ -737,6 +767,98 @@ const GenAIApp = (function () {
return payload;
}

/**
* Replaces all image parts in a Gemini conversation with a text description
* generated by Gemini 3 Pro Preview (Vertex AI Vision).
*
* - Detects images (inlineData / fileData) across all messages
* - Sends them to Gemini Vision for analysis
* - Removes images from the conversation
* - Appends a new message containing the image analysis
*
* @param {Array<Object>} currentContents
* Gemini conversation contents.
*
* @returns {Array<Object>}
* Updated contents with images removed and a text analysis appended.
*/
this._convertImagesToText = function (currentContents) {
if (!currentContents || currentContents.length === 0) return currentContents;

const hasImages = currentContents.some(c => {
const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []);
return parts.some(p => p.inlineData || p.fileData);
});

if (!hasImages) return currentContents;

if (verbose) {
console.log("[GenAIApp] - Images detected. Converting to text description...");
}

const imageParts = currentContents.flatMap(c => {
const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []);
return parts.filter(p => p.inlineData || p.fileData);
});

const descriptionPayload = {
contents: [{
role: "user",
parts: [
...imageParts,
{ text: promptForVision}
]
}],
generationConfig: {
temperature: 0.2,
maxOutputTokens: 2000
}
};

const options = {
method: 'post',
contentType: 'application/json',
headers: {
'Authorization': 'Bearer ' + ScriptApp.getOAuthToken()
},
payload: JSON.stringify(descriptionPayload),
muteHttpExceptions: true
};

const endpoint = `https://aiplatform.googleapis.com/v1/projects/${gcpProjectId}/locations/global/publishers/google/models/${modelForVision}:generateContent`;
let description = "Image analysis returned no text.";
try {
const response = UrlFetchApp.fetch(endpoint, options);
const result = JSON.parse(response.getContentText());

if (result?.candidates?.[0]?.content?.parts?.[0]?.text) {
description = result.candidates[0].content.parts[0].text;
} else if (result?.parts?.[0]?.text) {
description = result.parts[0].text;
}
} catch (error) {
Logger.log(`[GenAIApp] - Image analysis failed during Gemini Vision preprocessing: ${error}`);
}

let newContents = JSON.parse(JSON.stringify(currentContents));
newContents.forEach(c => {
const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []);
c.parts = parts.filter(p => !p.inlineData && !p.fileData);
});

newContents = newContents.filter(c => {
const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []);
return parts.length > 0;
});

newContents.push({
role: "user",
parts: [{ text: `IMAGE ANALYSIS:\n${description}` }]
});

return newContents;
}

/**
* Get a blob from a Google Drive file ID
*
Expand Down Expand Up @@ -2254,6 +2376,20 @@ const GenAIApp = (function () {
*/
setPrivateInstanceBaseUrl: function (baseUrl) {
privateInstanceBaseUrl = baseUrl;
},

/**
* Sets the prompt used to describe images when using Gemini with RAG.
*
* Gemini does not support combining images and vector stores directly.
* When RAG is enabled, images are first analyzed and replaced with text
* using this prompt before querying the Gemini vector store.
*
* @param {string} prompt The prompt to use for image description.
*/
setPromptForVision: function (prompt) {
promptForVision = prompt;
}

}
})();