lm-sys · michaelzhiluo · Feb 6, 2025 · Feb 6, 2025 · Feb 6, 2025
diff --git a/fastchat/serve/monitor/monitor.py b/fastchat/serve/monitor/monitor.py
@@ -1050,6 +1050,21 @@ def build_leaderboard_tab(
                     "Please configure it to a valid URL."
                 )
 
+            from fastchat.serve.monitor.video_arena import (
+                build_video_arena_tab,
+                VIDEO_ARENA_LEADERBOARD_URL,
+            )
+
+            if VIDEO_ARENA_LEADERBOARD_URL:
+                with gr.Tab("🎥 Text-to-Video", id=6):
+                    build_video_arena_tab()
+            else:
+                print(
+                    "Unable to build Video Arena's Leaderboard. "
+                    "VIDEO_ARENA_LEADERBOARD_URL environment variable is not set. "
+                    "Please configure it to a valid URL."
+                )
+
         if not show_plot:
             gr.Markdown(
                 """ ## Visit our [HF space](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) for more analysis!

diff --git a/fastchat/serve/monitor/video_arena.py b/fastchat/serve/monitor/video_arena.py
@@ -0,0 +1,292 @@
+"""Video Arena module for displaying video model leaderboard and rankings.
+
+This module provides functionality to fetch, process and display leaderboard data
+from the Video Arena API. It includes model information, score processing, and
+Gradio UI components for visualization.
+
+Classes:
+    ModelVersion: Data class for storing model version information
+    ModelInfo: Data class for storing model class information
+
+Functions:
+    process_video_arena_leaderboard: Process raw leaderboard data into DataFrame
+    build_video_arena_tab: Build and display the Video Arena leaderboard UI
+"""
+
+import os
+from dataclasses import dataclass
+import requests
+import pandas as pd
+import gradio as gr
+
+from fastchat.serve.monitor.monitor import recompute_final_ranking
+
+# URL for fetching Video Arena leaderboard data
+VIDEO_ARENA_LEADERBOARD_URL = os.getenv(
+    "VIDEO_ARENA_LEADERBOARD_URL", "https://www.videoarena.tv/api/v1/leaderboard"
+)
+
+
+@dataclass
+class ModelVersion:
+    """Data class to store model version information.
+
+    Attributes:
+        license: The license type of the model version
+        website: The website URL for the model version
+    """
+
+    license: str
+    website: str
+
+
+@dataclass
+class ModelInfo:
+    """Data class to store model class information.
+
+    Attributes:
+        official_name: The official display name of the model
+        organization: The organization that created the model
+        versions: Dictionary mapping version names to ModelVersion objects
+    """
+
+    official_name: str
+    organization: str
+    versions: dict[str, ModelVersion] = None
+
+
+# Mapping of model identifiers to their information
+VIDEO_MODEL_INFO = {
+    "veo": ModelInfo(
+        "Veo",
+        "Google",
+        versions={
+            "2.0": ModelVersion(
+                "Proprietary", "https://deepmind.google/technologies/veo/veo-2/"
+            )
+        },
+    ),
+    "minimax": ModelInfo(
+        "Minimax",
+        "Hailuo",
+        versions={"01": ModelVersion("Proprietary", "https://hailuoai.video/")},
+    ),
+    "kling": ModelInfo(
+        "Kling",
+        "Kuaishou",
+        versions={
+            "1.0": ModelVersion("Proprietary", "https://klingai.com/"),
+            "1.5": ModelVersion("Proprietary", "https://klingai.com/"),
+        },
+    ),
+    "sora": ModelInfo(
+        "Sora",
+        "OpenAI",
+        versions={"1": ModelVersion("Proprietary", "https://openai.com/sora/")},
+    ),
+    "luma": ModelInfo(
+        "Luma",
+        "LumaLabs",
+        versions={
+            "1.6": ModelVersion("Proprietary", "https://lumalabs.ai/dream-machine")
+        },
+    ),
+    "runway": ModelInfo(
+        "Runway",
+        "Runway",
+        versions={
+            "default": ModelVersion(
+                "Proprietary", "https://runwayml.com/research/introducing-gen-3-alpha"
+            )
+        },
+    ),
+    "genmo": ModelInfo(
+        "Genmo",
+        "Genmo",
+        versions={
+            "0.2": ModelVersion("Proprietary", "https://www.genmo.ai/"),
+            "Mochi-1": ModelVersion("Apache-2.0", "https://www.genmo.ai/"),
+        },
+    ),
+    "svd": ModelInfo(
+        "SVD",
+        "StabilityAI",
+        versions={
+            "1.0": ModelVersion("Proprietary", "https://stability.ai/stable-video")
+        },
+    ),
+    "opensora": ModelInfo(
+        "OpenSora",
+        "OpenSora",
+        versions={
+            "1.2": ModelVersion("Apache-2.0", "https://github.com/hpcaitech/Open-Sora")
+        },
+    ),
+    "pika": ModelInfo(
+        "Pika",
+        "PikaLabs",
+        versions={
+            "β": ModelVersion("Proprietary", "pika.art"),
+            "1.5": ModelVersion("Proprietary", "pika.art"),
+        },
+    ),
+}
+
+
+def process_video_arena_leaderboard(data):
+    """Process raw leaderboard data into a formatted DataFrame.
+
+    Args:
+        data: Raw leaderboard data from the Video Arena API containing model scores
+             and metadata
+
+    Returns:
+        pd.DataFrame: Processed leaderboard with rankings and confidence intervals.
+                     Contains columns for rank, model name, scores, confidence intervals,
+                     vote counts, organization and license information.
+    """
+    leaderboard = []
+    for item in data:
+        model_name = item["model"].lower()
+        version = item["version"]
+
+        # Skip veo model
+        if model_name == "veo":
+            continue
+
+        # Get model info from mapping
+        model_info = VIDEO_MODEL_INFO.get(
+            model_name,
+            ModelInfo(
+                model_name, "Unknown", {"default": ModelVersion("Proprietary", "")}
+            ),
+        )
+
+        # Determine license and website based on version
+        version_key = version if version in model_info.versions else "default"
+        if version_key not in model_info.versions:
+            model_info.versions["default"] = ModelVersion("Proprietary", "")
+        model_version = model_info.versions[version_key]
+        license_type, website = model_version.license, model_version.website
+
+        # Replace spaces with dashes in the display name
+        display_name = f"{model_info.official_name} {version}".replace(" ", "-")
+        model_data = {
+            "name": f"[{display_name}]({website})" if website else display_name,
+            "visibility": "public",
+            "score": round(item["scores"]["elo"]),
+            "lower": item["scores"]["elo"] - item["scores"]["ci_lower"],
+            "upper": item["scores"]["elo"] + item["scores"]["ci_upper"],
+            "votes": (
+                item["scores"]["win"]["total"]
+                + item["scores"]["loss"]["total"]
+                + item["scores"]["tie"]["total"]
+            ),
+            "organization": model_info.organization,
+            "license": license_type,
+        }
+        leaderboard.append(model_data)
+
+    leaderboard = pd.DataFrame(leaderboard)
+
+    # Calculate confidence intervals
+    leaderboard["rating_q975"] = leaderboard["upper"].round().astype(int)
+    leaderboard["rating_q025"] = leaderboard["lower"].round().astype(int)
+
+    leaderboard["upper_diff"] = leaderboard["upper"] - leaderboard["score"]
+    leaderboard["lower_diff"] = leaderboard["score"] - leaderboard["lower"]
+
+    # Round the differences to integers
+    leaderboard["upper_diff"] = leaderboard["upper_diff"].round().astype(int)
+    leaderboard["lower_diff"] = leaderboard["lower_diff"].round().astype(int)
+
+    leaderboard["confidence_interval"] = (
+        "+"
+        + leaderboard["upper_diff"].astype(str)
+        + "/-"
+        + leaderboard["lower_diff"].astype(str)
+    )
+
+    # Calculate rankings using the existing function
+    rankings_ub = recompute_final_ranking(leaderboard)
+    leaderboard.insert(loc=0, column="Rank* (UB)", value=rankings_ub)
+
+    # Sort the leaderboard
+    leaderboard = leaderboard.sort_values(
+        by=["Rank* (UB)", "score"], ascending=[True, False]
+    )
+
+    return leaderboard
+
+
+def build_video_arena_tab():
+    """Build and display the Video Arena leaderboard tab in the Gradio interface.
+
+    Fetches data from the Video Arena API, processes it into a formatted DataFrame,
+    and creates a formatted display using Gradio components including:
+    - Summary statistics (number of models and battles)
+    - Interactive leaderboard table
+    - Explanatory text for ranking methodology
+
+    Returns:
+        None. Creates and displays Gradio UI components directly.
+    """
+    response = requests.get(VIDEO_ARENA_LEADERBOARD_URL)
+    if response.status_code == 200:
+        leaderboard = process_video_arena_leaderboard(response.json())
+        leaderboard = leaderboard.rename(
+            columns={
+                "name": "Model",
+                "confidence_interval": "95% CI",
+                "score": "Arena Score",
+                "organization": "Organization",
+                "votes": "Votes",
+                "license": "License",
+            }
+        )
+
+        column_order = [
+            "Rank* (UB)",
+            "Model",
+            "Arena Score",
+            "95% CI",
+            "Votes",
+            "Organization",
+            "License",
+        ]
+        leaderboard = leaderboard[column_order]
+        num_models = len(leaderboard)
+        total_battles = int(leaderboard["Votes"].sum()) // 2
+
+        md = f"""
+        [VideoArena](https://www.videoarena.tv/) is a free AI video service allowing users to access, compare, and rank
+        text-to-video capabilities of state-of-the-art generative models. This
+        leaderboard contains the relative performance and ranking of {num_models}
+        models over {total_battles} battles.
+        """
+
+        gr.Markdown(md, elem_id="leaderboard_markdown")
+        gr.DataFrame(
+            leaderboard,
+            datatype=["number", "markdown", "number", "str", "number", "str", "str"],
+            elem_id="video_arena_leaderboard",
+            height=600,
+            wrap=True,
+            interactive=False,
+            column_widths=[70, 130, 60, 80, 50, 80, 70],
+        )
+
+        gr.Markdown(
+            """
+    ***Rank (UB)**: model's ranking (upper-bound), defined by one + the number of models
+    that are statistically better than the target model.
+    Model A is statistically better than model B when A's lower-bound score is greater
+    than B's upper-bound score (in 95% confidence interval). \n
+    **Confidence Interval**: represents the range of uncertainty around the Arena Score.
+    It's displayed as +X / -Y, where X is the difference between the upper bound and
+    the score, and Y is the difference between the score and the lower bound.
+    """,
+            elem_id="leaderboard_markdown",
+        )
+    else:
+        gr.Markdown("Error with fetching Video Arena data. Check back in later.")