AutomationSolutionz · Zayadul-huq-afnan · Jul 28, 2025 · Jul 28, 2025 · Oct 8, 2025 · sazid
diff --git a/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py b/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py
@@ -136,7 +136,8 @@
     {"name": "proxy server",                                "function": "proxy_server",                           "screenshot": "none" },
 
     {"name": "render jinja template",                       "function": "render_jinja_template",                  "screenshot": "none" },
-    {"name": "download chrome extension",                    "function": "download_chrome_extension",             "screenshot": "none" },
+    {"name": "download chrome extension",                   "function": "download_chrome_extension",             "screenshot": "none" },
+    {"name": "AI - LLM prompt with files",                  "function": "AI_LLM_prompt_with_files",               "screenshot": "none" },
 
 ) # yapf: disable
 

diff --git a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py
@@ -7107,3 +7107,130 @@ def download_chrome_extension(data_set):
     except Exception:
         return CommonUtil.Exception_Handler(sys.exc_info())
 
+
+@logger
+def AI_LLM_prompt_with_files(data_set):
+    """
+    This action will extract the text from images using OpenAI's vision API. This action also takes user prompt and returns
+    the result according to the user prompt. If the user does not give any prompt, then by default it
+    extracts all text from the image and returns the result in JSON format. 
+
+    Args:
+        data_set:
+            ------------------------------------------------------------------------------
+            image                       | input parameter | %| image.png |%
+            user prompt                 | optional parameter | Extract invoice details
+            AI - LLM prompt with files | common action   | AI - LLM prompt with files
+            ------------------------------------------------------------------------------
+
+    Return:
+        `passed` if success
+        `zeuz_failed` if fails
+    """
+    sModuleInfo = inspect.currentframe().f_code.co_name + " : " + MODULE_NAME
+    global selenium_driver
+
+    try:
+        import base64
+        import requests
+        import json
+        import os
+        user_image_path = None
+        user_prompt = None
+
+        for left, mid, right in data_set:
+            left = left.lower().replace(" ", "")
+            mid = mid.lower().replace(" ", "")
+            right = right.strip()
+
+            if left == 'image':
+                if right != '':
+                    user_image_path = right
+
+            if left == "userprompt":
+                if right != '':
+                    user_prompt = right
+
+        # Validate image path
+        if not user_image_path:
+            CommonUtil.ExecLog(sModuleInfo, "No image path provided. Please provide an image path.", 3)
+            return "zeuz_failed"
+
+        image_path = user_image_path
+        CommonUtil.ExecLog(sModuleInfo, f"Processing image: {image_path}", 1)
+
+        if not os.path.isfile(image_path):
+            CommonUtil.ExecLog(sModuleInfo, f"Image file not found: {image_path}", 3)
+            return "zeuz_failed"
+
+        prompt = user_prompt
+        if not prompt:
+            prompt = "Extract all text from this image and return the result in JSON format."
+
+        # Convert Image to Base64
+        with open(image_path, "rb") as img_file:
+            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
+
+        # Load API key from .env file
+        try:
+            from dotenv import load_dotenv
+            framework_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+            env_path = os.path.join(framework_dir, ".env")
+            load_dotenv(env_path)
+            api_key = os.getenv("OPENAI_API")
+            if not api_key:
+                CommonUtil.ExecLog(sModuleInfo, "OPENAI_API not found in .env file", 3)
+                return "zeuz_failed"
+        except Exception as e:
+            CommonUtil.ExecLog(sModuleInfo, f"Failed to load API key from .env: {str(e)}", 3)
+            return "zeuz_failed"
+
+        # Prepare API Request
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+
+        payload = {
+            "model": "gpt-4o",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/png;base64,{base64_image}"
+                            }
+                        },
+                        {
+                            "type": "text",
+                            "text": prompt
+                        }
+                    ]
+                }
+            ]
+        }
+
+        # Send Request
+        CommonUtil.ExecLog(sModuleInfo, "Analyzing image...", 1)
+        response = requests.post(
+            "https://api.openai.com/v1/chat/completions",
+            headers=headers,
+            data=json.dumps(payload)
+        )
+
+        # === 5. Process Response ===
+        if response.status_code == 200:
+            response_data = response.json()
+            extracted_data = response_data["choices"][0]["message"]["content"]
+            CommonUtil.ExecLog(sModuleInfo, f"Text extracted successfully from: {image_path}", 1)
+            CommonUtil.ExecLog(sModuleInfo, f"Extracted content: {extracted_data}", 5)
+            return "passed"
+        else:
+            CommonUtil.ExecLog(sModuleInfo, f"OpenAI API error: {response.status_code} - {response.text}", 3)
+            return "zeuz_failed"
+
+    except Exception:
+        return CommonUtil.Exception_Handler(sys.exc_info())
+