spcl · Copilot · Apr 15, 2026 · Apr 15, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/benchmarks/000.microbenchmarks/010.sleep/input.py b/benchmarks/000.microbenchmarks/010.sleep/input.py
@@ -11,3 +11,10 @@ def buckets_count():
 
 def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func):
     return { 'sleep': size_generators[size] }
+
+def validate_output(input_config: dict, output: dict, language: str, storage = None) -> str | None:
+
+    if output.get('result') != input_config.get('sleep'):
+        return f"Expected sleep duration {input_config.get('sleep')} but got {output.get('result')}"
+
+    return None
diff --git a/benchmarks/100.webapps/110.dynamic-html/input.py b/benchmarks/100.webapps/110.dynamic-html/input.py
@@ -10,3 +10,24 @@ def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths,
     input_config = {'username': 'testname'} 
     input_config['random_len'] = size_generators[size]
     return input_config
+
+def validate_output(input_config: dict, output: dict, language: str, storage = None) -> str | None:
+
+    result = output.get('result', '')
+    username = input_config.get('username', '')
+    random_len = input_config.get('random_len', 0)
+
+    if not isinstance(result, str) or len(result) == 0:
+        return f"Output is not a non-empty string (type={type(result).__name__}, len={len(result) if isinstance(result, str) else 'N/A'})"
+
+    if f'Welcome {username}!' not in result:
+        return f"Missing expected username greeting 'Welcome {username}!' in HTML output"
+
+    if 'Data generated at:' not in result:
+        return "Missing expected timestamp text 'Data generated at:' in HTML output"
+
+    actual_li_count = result.count('<li>')
+    if actual_li_count != random_len:
+        return f"Expected {random_len} list items but found {actual_li_count} <li> tags in HTML output"
+
+    return None
diff --git a/benchmarks/100.webapps/120.uploader/README.md b/benchmarks/100.webapps/120.uploader/README.md
@@ -7,3 +7,5 @@
 ## Description
 
 The benchmark implements the common workflow of uploading user-defined data to the persistent cloud storage. It accepts a URL, downloads file contents, and uploads them to the storage. Python implementation uses the standard library `requests`, while the Node.js version uses the third-party `requests` library installed with `npm`.
+
+While 128 MB is technically sufficient for memory size, a larger memory value or a longer timeout might be required for the `large` input size.
diff --git a/benchmarks/100.webapps/120.uploader/config.json b/benchmarks/100.webapps/120.uploader/config.json
@@ -1,5 +1,5 @@
 {
-  "timeout": 30,
+  "timeout": 60,
   "memory": 128,
   "languages": ["python", "nodejs"],
   "modules": ["storage"]

diff --git a/benchmarks/100.webapps/120.uploader/input.py b/benchmarks/100.webapps/120.uploader/input.py
@@ -1,12 +1,24 @@
 # Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved.
+import hashlib
+import os
+import tempfile
 
 url_generators = {
     # source: mlperf fake_imagenet.sh. 230 kB
     'test' : 'https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/Jammlich_crop.jpg/800px-Jammlich_crop.jpg',
     # video: HPX source code, 6.7 MB
     'small': 'https://github.com/STEllAR-GROUP/hpx/archive/refs/tags/1.4.0.zip',
     # resnet model from pytorch. 98M
-    'large':  'https://download.pytorch.org/models/resnet50-19c8e357.pth'
+    'large': 'https://download.pytorch.org/models/resnet50-19c8e357.pth'
+}
+
+# MD5 checksums for stable reference objects.
+# These are computed from the objects at the URLs above and must be updated
+# if the remote files change.
+expected_checksums = {
+    'test': '91799b8ca818598fc5b8790f3b338150',
+    'small': 'baf7ea99128aa3e5c2d0c8b8f61cce1b',
+    'large': '9e9c86b324d80e65229fab49b8d9a8e8'
 }
 
 def buckets_count():
@@ -15,6 +27,48 @@ def buckets_count():
 def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func):
     input_config = {'object': {}, 'bucket': {}}
     input_config['object']['url'] = url_generators[size]
+    input_config['object']['size'] = size
     input_config['bucket']['bucket'] = benchmarks_bucket
     input_config['bucket']['output'] = output_buckets[0]
     return input_config
+
+def validate_output(input_config: dict, output: dict, language: str, storage = None) -> str | None:
+
+    result = output.get('result', {})
+    key = result.get('key', '')
+    url = input_config.get('object', {}).get('url', '')
+    size = input_config.get('object', {}).get('size', '')
+
+    if not isinstance(key, str) or len(key) == 0:
+        return f"Output key is missing or invalid (type={type(key).__name__}, value='{key}')"
+
+    if result.get('url') != url:
+        return f"Output URL mismatch: expected '{url}' but got '{result.get('url')}'"
+
+    if storage is None:
+        return None
+
+    bucket = input_config.get('bucket', {}).get('bucket', '')
+    expected_name = os.path.basename(url)
+    # Storage client adds unique hash: filename.{hash}.ext
+    # Check that the key contains the base filename and has the same extension
+    expected_base, expected_ext = os.path.splitext(expected_name)
+    key_base = os.path.basename(key)
+    if not (expected_base in key_base and key_base.endswith(expected_ext)):
+        return f"Storage key '{key_base}' does not match expected pattern (base='{expected_base}', ext='{expected_ext}')"
+
+    with tempfile.NamedTemporaryFile(delete=False) as f:
+        tmp_path = f.name
+    try:
+        storage.download(bucket, key, tmp_path)
+        file_size = os.path.getsize(tmp_path)
+        if file_size == 0:
+            return f"Downloaded file from storage is empty (bucket='{bucket}', key='{key}')"
+
+        with open(tmp_path, 'rb') as f:
+            actual_md5 = hashlib.md5(f.read()).hexdigest()
+        if actual_md5 != expected_checksums[size]:
+            return f"MD5 checksum mismatch for size '{size}': expected '{expected_checksums[size]}' but got '{actual_md5}'"
+    finally:
+        os.unlink(tmp_path)
+    return None
diff --git a/benchmarks/100.webapps/130.crud-api/input.py b/benchmarks/100.webapps/130.crud-api/input.py
@@ -93,5 +93,92 @@ def generate_input(
             )
 
     input_config["requests"] = requests
+    input_config["size"] = size
 
     return input_config
+
+def validate_output(input_config: dict, output: dict, language: str, storage = None) -> str | None:
+
+    results = output.get('result', [])
+    requests = input_config.get('requests', [])
+
+    if not isinstance(results, list):
+        return f"Output results is not a list (type={type(results).__name__})"
+
+    if len(results) != len(requests):
+        return f"Results count mismatch: expected {len(requests)} responses but got {len(results)}"
+
+    if len(results) == 1:
+        """
+            test input -> one result for a single cart item
+            small input -> one result for entire cart
+        """
+        request = requests[0]
+        result = results[0]
+        route = request.get('route')
+
+        if route == 'GET /cart/{id}':
+
+            expected_item = {"name": "Gothic Game", "price": 42, "quantity": 2}
+            expected_item["cart_id"] = request["body"]["cart"]
+            expected_item["product_id"] = request["path"]["id"]
+
+            if expected_item != result:
+                return f"Wrong item details for GET /cart/{{id}}: expected {expected_item} but got {result}"
+
+        elif route == 'GET /cart':
+
+            products = [
+                ("Gothic Game", 42, 2),
+                ("Gothic 2", 142, 3),
+                ("SeBS Benchmark", 1000, 1),
+                ("Mint Linux", 0, 5)
+            ]
+            total_cost = sum([p[1] * p[2] for p in products])
+            items = sum([p[2] for p in products])
+            cart = [p[0] for p in products]
+
+            if sorted(cart) != sorted(result['products']):
+                return f"Wrong product details for GET /cart: expected {cart} but got {result['products']}"
+
+            if total_cost != result['total_cost']:
+                return f"Wrong product details for GET /cart: expected {total_cost}, but got {result['total_cost']}"
+
+            if abs(total_cost / items - result['avg_price']) > 1e-6:
+                return f"Wrong product details for GET /cart: expected {total_cost/items}, but got {result['products']}"
+        else:
+            return f"Unexpected route in single-result output: expected 'GET /cart/{{id}}' or 'GET /cart' but got '{route}'"
+    else:
+        """
+            large input -> 10 responses, 
+        """
+
+        put_results = results[0::2]
+        get_results = results[1::2]
+
+        for put_result in put_results:
+            if put_result != {}:
+                return f"PUT /cart expected empty dict {{}} but got {put_result}"
+
+        current_cost = 0
+        current_quantity = 0
+        items = []
+        for idx, get_result in enumerate(get_results):
+
+            items.append(f"Test Item {idx}")
+            current_cost += 100 * idx * idx
+            current_quantity += idx
+
+            if get_result['products'] != items:
+                return f"Wrong product details for GET /cart: expected {items} but got {get_result['products']}"
+
+            if current_cost != get_result['total_cost']:
+                return f"Wrong product details for GET /cart: expected {current_cost}, but got {get_result['total_cost']}"
+
+            if current_quantity == 0:
+                continue
+
+            if abs(current_cost / current_quantity - get_result['avg_price']) > 1e-6:
+                return f"Wrong product details for GET /cart: expected {current_cost/current_quantity}, but got {get_result['products']}"
+
+    return None
diff --git a/benchmarks/100.webapps/130.crud-api/python/function.py b/benchmarks/100.webapps/130.crud-api/python/function.py
@@ -34,7 +34,7 @@ def query_products(cart_id: str):
     for product in res:
 
         products.append(product["name"])
-        price_sum += product["price"]
+        price_sum += product["price"] * product["quantity"]
         quantity_sum += product["quantity"]
 
     avg_price = price_sum / quantity_sum if quantity_sum > 0 else 0.0

diff --git a/benchmarks/200.multimedia/210.thumbnailer/input.py b/benchmarks/200.multimedia/210.thumbnailer/input.py
@@ -1,5 +1,9 @@
 # Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved.
-import glob, os
+import glob
+import os
+import tempfile
+
+from PIL import Image
 
 def buckets_count():
     return (1, 1)
@@ -28,3 +32,41 @@ def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths,
     input_config['bucket']['input'] = input_paths[0]
     input_config['bucket']['output'] = output_paths[0]
     return input_config
+
+def validate_output(input_config: dict, output: dict, language: str, storage = None) -> str | None:
+
+    result = output.get('result', {})
+    key = result.get('key', '')
+
+    if not isinstance(key, str) or len(key) == 0:
+        return f"Output key is missing or invalid (type={type(key).__name__}, value='{key}')"
+
+    if storage is None:
+        return None
+
+    bucket = input_config.get('bucket', {}).get('bucket', '')
+    max_width = input_config.get('object', {}).get('width', 0)
+    max_height = input_config.get('object', {}).get('height', 0)
+
+    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as f:
+        tmp_path = f.name
+    try:
+        storage.download(bucket, key, tmp_path)
+        file_size = os.path.getsize(tmp_path)
+        if file_size == 0:
+            return f"Downloaded thumbnail from storage is empty (bucket='{bucket}', key='{key}')"
+
+        try:
+            with Image.open(tmp_path) as img:
+                w, h = img.size
+                if w <= 0 or h <= 0:
+                    return f"Thumbnail has invalid dimensions: width={w}, height={h}"
+                if w > max_width:
+                    return f"Thumbnail width {w} exceeds maximum {max_width}"
+                if h > max_height:
+                    return f"Thumbnail height {h} exceeds maximum {max_height}"
+                return None
+        except Exception as e:
+            return f"Failed to open or validate thumbnail image: {str(e)}"
+    finally:
+        os.unlink(tmp_path)
diff --git a/benchmarks/200.multimedia/220.video-processing/README.md b/benchmarks/200.multimedia/220.video-processing/README.md
@@ -7,3 +7,5 @@
 ## Description
 
 The benchmark implements two operations on video files: adding a watermark and creating a gif. Both input and output media are passed through the cloud storage. To process the video, the benchmark uses `ffmpeg`. The benchmark installs the most recent static binary of `ffmpeg` provided by [John van Sickle](https://johnvansickle.com/ffmpeg/).
+
+While 512 MB is technically sufficient and works well for the watermark operations, the `large` input converts the video into a gif which is more computationally intensive. On AWS, you should expect around ~30 seconds runtime on 1024 MiB allocation.
diff --git a/benchmarks/200.multimedia/220.video-processing/input.py b/benchmarks/200.multimedia/220.video-processing/input.py
@@ -1,11 +1,22 @@
 # Copyright 2020-2025 ETH Zurich and the SeBS authors. All rights reserved.
-import glob, os
+import glob
+import os
+import tempfile
+import hashlib
 
 def buckets_count():
     return (1, 1)
 
+# MD5 checksums for video processing output (operation, duration) -> hash
+# These checksums ensure ffmpeg produces deterministic output
+expected_checksums = {
+    ('watermark', 1): '87f3a1ef9d90f93fd24c19ad0209a913',
+    ('watermark', 3): '98286aa95fdbd7501b2cf244027c0ca2',
+    ('extract-gif', 2): '20c17009382df93f6fcbf7ba1c53def0'
+}
+
 '''
-    Generate test, small and large workload for thumbnailer.
+    Generate test, small and large workload for video processing.
 
     :param data_dir: directory where benchmark data is placed
     :param size: workload size
@@ -17,12 +28,61 @@ def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths,
     for file in glob.glob(os.path.join(data_dir, '*.mp4')):
         img = os.path.relpath(file, data_dir)
         upload_func(0, img, file)
-    #TODO: multiple datasets
+
+    # Different operations for different sizes to test various video processing modes
+    # Note: extract-gif can timeout on some configurations (long and heavy)
+    size_configs = {
+        'test': {'op': 'watermark', 'duration': 1},
+        'small': {'op': 'watermark', 'duration': 3},
+        'large': {'op': 'extract-gif', 'duration': 2},
+    }
+
+    config = size_configs.get(size, size_configs['test'])
+
     input_config = {'object': {}, 'bucket': {}}
-    input_config['object']['key'] = img
-    input_config['object']['op'] = 'watermark'
-    input_config['object']['duration'] = 1
+    input_config['object']['key'] = "city.mp4"
+    input_config['object']['op'] = config['op']
+    input_config['object']['duration'] = config['duration']
     input_config['bucket']['bucket'] = benchmarks_bucket
     input_config['bucket']['input'] = input_paths[0]
     input_config['bucket']['output'] = output_paths[0]
     return input_config
+
+def validate_output(input_config: dict, output: dict, language: str, storage = None) -> str | None:
+
+    result = output.get('result', {})
+    key = result.get('key', '')
+
+    if not isinstance(key, str) or len(key) == 0:
+        return f"Output key is missing or invalid (type={type(key).__name__}, value='{key}')"
+
+    if storage is None:
+        return None
+
+    bucket = input_config.get('bucket', {}).get('bucket', '')
+    op = input_config.get('object', {}).get('op', '')
+    duration = input_config.get('object', {}).get('duration', 0)
+
+    suffix = os.path.splitext(key)[1] or '.tmp'
+    with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as f:
+        tmp_path = f.name
+    try:
+        storage.download(bucket, key, tmp_path)
+        file_size = os.path.getsize(tmp_path)
+        if file_size == 0:
+            return f"Downloaded video output from storage is empty (bucket='{bucket}', key='{key}')"
+
+        # Check MD5 checksum if available for this operation
+        checksum_key = (op, duration)
+        if checksum_key not in expected_checksums:
+            return f"Missing validation configuration for ({op}, {duration})!"
+
+        with open(tmp_path, 'rb') as f:
+            actual_md5 = hashlib.md5(f.read()).hexdigest()
+        expected_md5 = expected_checksums[checksum_key]
+        if actual_md5 != expected_md5:
+            return f"MD5 checksum mismatch for op='{op}' duration={duration}: expected '{expected_md5}' but got '{actual_md5}'"
+
+        return None
+    finally:
+        os.unlink(tmp_path)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,3 +7,5 @@
		## Description

		The benchmark implements the common workflow of uploading user-defined data to the persistent cloud storage. It accepts a URL, downloads file contents, and uploads them to the storage. Python implementation uses the standard library `requests`, while the Node.js version uses the third-party `requests` library installed with `npm`.

		While 128 MB is technically sufficient for memory size, a larger memory value or a longer timeout might be required for the `large` input size.