InternScience · JayKuo1 · Jan 26, 2026 · Jan 26, 2026 · gemini-code-assist · Jan 26, 2026
diff --git a/.gitignore b/.gitignore
@@ -210,4 +210,7 @@ GPT4o_MINI/
 .DS_Store
 tools/*
 tmp_*/*
-vlmeval/dataset/SciCode/eval/data/*.h5
+vlmeval/dataset/SciCode/eval/data/*.h5
+
+# EarthLink file
+variables_embedding.jsonl
diff --git a/agent_runner.py b/agent_runner.py
@@ -7,8 +7,8 @@
 
 from tqdm import tqdm
 
+from scieval.agents import create_agent, get_available_agents
 from scieval.agents.records import EvalRecord, TrajectoryStore
-from scieval.agents.smolagents import SmolAgentsAgent
 from scieval.dataset import build_dataset
 from scieval.smp import dump, get_logger, load, timestr, githash, ls
 
@@ -32,11 +32,38 @@ def _build_dataset_from_config(cfg: Dict[str, Any], dataset_name: str):
 
 
 def _build_agent_from_config(cfg: Dict[str, Any], agent_name: str):
+    """
+    Build an agent from configuration.
+
+    Args:
+        cfg: Configuration dictionary
+        agent_name: Name of the agent in the config
+
+    Returns:
+        Agent instance
+
+    Raises:
+        ValueError: If agent class is not supported
+        ImportError: If agent dependencies are not installed
+    """
     config = copy.deepcopy(cfg[agent_name])
     cls_name = config.pop("class", "SmolAgentsAgent")
-    if cls_name not in ["SmolAgentsAgent", "smolagents"]:
-        raise ValueError(f"Unsupported agent class: {cls_name}")
-    return SmolAgentsAgent(**config)
+
+    # Handle legacy name mapping
+    if cls_name == "smolagents":
+        cls_name = "SmolAgentsAgent"
+
+    try:
+        return create_agent(cls_name, **config)
+    except ImportError as e:
+        available = get_available_agents()
+        available_list = [name for name, avail in available.items() if avail]
+        raise ImportError(
+            f"Failed to create agent '{cls_name}'. "
+            f"Required dependencies may not be installed.\n"
+            f"Available agents: {', '.join(available_list) if available_list else 'None'}\n"
+            f"Error: {e}"
+        ) from e
 
 
 def _run_one_sample(

diff --git a/scieval/agents/EarthLink/agent/data_check.py b/scieval/agents/EarthLink/agent/data_check.py
@@ -0,0 +1,81 @@
+import json
+from .. import config as CFG
+from ..tools.data import ALL_DATA_TOOLS
+from ..utils.common import extract_code_blocks
+from ..utils.agent import Agent
+
+
+def create_data_check_agent(logger=None):
+
+    prompt = (
+        "You are a geoscience experiment agent. "
+        "You understand the user's needs and call related functions to confirm current available CMIP data and observational data information. "
+        "Then judge whether the current available data can meet the user's needs. \n"
+        "## Note: \n"
+        "1. Do not guess the available data information, you should call the function to obtain the available data information.\n"
+        "2. Only judge based on data information obtained from the function, we can not use any external web data.\n"
+        "3. You can use the corresponding function tools to find the variable name abbreviations in CMIP, CMIP model names, names of variable that can be derived, avaiable reference datasets, etc.\n"
+        "(note that the variable names in the observation datasets have been processed to be the same as those in CMIP). \n"
+        "4. If it can or partially can (for example, there are multiple solutions in the plan, and some solutions can be met), the check passes and the reason can be concise.\n"
+        "If it cannot (for example, the necessary observation data is missing or all solutions in all plans cannot be met), the check fails and you need to give as detailed a reason as possible.\n"
+        "## Ouput format:\n"
+        "```json\n{\"pass\": true or false, \"reason\": \"...\"}\n```"
+    )
+
+    agent = Agent(
+        name="Data Check Agent",
+        model_settings=CFG.DATA_CHECK_MODEL_SETTING,
+        system_prompt=prompt,
+        tools=ALL_DATA_TOOLS,
+        max_agent_iterations=CFG.DATA_CHECK_MAX_AGENT_ITERS,
+        logger=logger,
+    )
+    return agent
+
+
+async def chat_data_check_agent(run_info: dict, save_round: int = 0) -> dict:
+
+    user_request = run_info["user_request"]
+    experiment_plan = run_info["experiment_plan"]
+    logger = run_info['logger']
+
+    data_check_agent = create_data_check_agent(logger=logger)
+
+    data_check_input = (
+        f"<user_request>\n\n{user_request}\n\n</user_request>\n\n"
+        f"<experiment_plan>\n\n{experiment_plan}\n\n</experiment_plan>\n\n"
+        "Now, please judge whether the available data can meet the user's needs. "
+    )
+
+    max_try = 20
+    cur_try = 0
+    while True:
+        if cur_try >= max_try:
+            raise RuntimeError(
+                f"Failed to get valid JSON output from the data check agent after {max_try} tries."
+            )
+
+        cur_try += 1
+
+        result = await data_check_agent.chat(data_check_input)
+
+        try:
+            code = extract_code_blocks(result.content, language='json')
+            if code is not None:
+                output = json.loads(code)
+            else:
+                output = json.loads(result.content)
+            if ("pass" not in output):
+                raise ValueError("Output JSON must contain 'pass' and 'reason' keys.")            
-            if ("pass" not in output):
-                raise ValueError("Output JSON must contain 'pass' and 'reason' keys.")            
+            if "pass" not in output:
+                raise ValueError("Output JSON must contain 'pass' key.")
-            if ("pass" not in output):
-                raise ValueError("Output JSON must contain 'pass' and 'reason' keys.")            
+            if "pass" not in output:
+                raise ValueError("Output JSON must contain 'pass' key.")
+            if (not output['pass']) and ("reason" not in output):
+                raise ValueError("If 'pass' is false, output JSON must contain 'reason' key.")
+            break
+        except Exception as e:
+            data_check_input = (
+                f"Output is not a valid JSON or does not contain the required keys. Error: \n{e}\n"
+                "Please output in the following format:\n"
+                "```json\n{\"pass\": true or false, \"reason\": \"...\"}\n```"
+            )
+    data_check_agent.save_messages(f"{run_info['root']}/agent_logs/data_check_agent_round_{save_round}.json")
+
+    return output
diff --git a/scieval/agents/EarthLink/agent/plan.py b/scieval/agents/EarthLink/agent/plan.py
@@ -0,0 +1,177 @@
+import os
+import asyncio
+
+from .. import config as CFG
+from ..tools.web_search import web_search
+from ..tools.data import ALL_DATA_TOOLS
+from ..utils.agent import Agent
+
+from .data_check import chat_data_check_agent
+
+
+# For both plan and plan check agents
+PLAN_PROMPT_NOTE = (
+    "## Note: \n"
+    "1. Experiments can only use CMIP (Coupled Model Intercomparison Project Phase) datasets and some observation datasets.\n"
+    "2. The datasets used should be based on the user's request.\n"
+    "3. It is not mandatory to use CMIP data. If there is no special instruction and the observation data can meet the requirements, the observation data should be used.\n"
+    "4. If the required observation datasets are not available, you can use CMIP datasets instead. \n"
+    "5. You can use the web search tool to search for relevant scientific definitions and calculation steps.\n"
+    "6. You can use corresponding function tools to find the available cmip data, available observation data, CMIP model names, variable name abbreviations in CMIP, names of variable that can be derived, etc. "
+    "(note that the variable names in the observation datasets have been processed to be the same as those in CMIP). \n"
+    "7. If the variables required to complete the user task do not exist in CMIP, the corresponding variables should be calculated using existing variables.\n"
+    "8. If not specified, monthly data is preferred.\n"
+    "9. The plan should be as detailed and specific as possible, such as the time period, variable name, unit, etc. of the data used. "
+    "However, it should not include path names, specific parameter configurations (such as color map or line thickness for plotting, etc.), specific execution operations or codes (such as what packages are used for data processing and plotting), etc.\n"
+    "10. The plan should not include data download, sensitivity experiments, reproducibility, documentation, or future considerations.\n"
+    "11. The plan cannot contain any preconceived conclusions. \n"
+)
+PLAN_NOTE_CNT = 11
+
+
+PLAN_AGENT_TOOLS = [
+    web_search
+] + ALL_DATA_TOOLS
+
+
+def create_plan_agent(logger=None):
+
+    prompt = (
+        "You are a geoscience experiment agent who is good at experiment planning. "
+        "You understand the user's needs and output the corresponding experimental plan. "
+        "Your plan should include what data to use, what preprocessing needs to be done on the data, what calculations to perform, what kind of figures to draw, etc. \n"
+        f"{PLAN_PROMPT_NOTE}"
+        f"{PLAN_NOTE_CNT+1}. Output the plan directly, don't output anything else."
+    )
+
+    agent = Agent(
+        name="Plan Agent",
+        model_settings=CFG.PLAN_MODEL_SETTING,
+        system_prompt=prompt,
+        tools=PLAN_AGENT_TOOLS,
+        max_agent_iterations=CFG.PLAN_MAX_AGENT_ITERS,
+        logger=logger,
+        verbose=False
+    )
+
+    return agent
+
+
+def create_plan_aggregation_agent(logger=None):
+
+    prompt = (
+        "You are a geoscience experiment agent who is good at checking and making experimental plans. "
+        "You understand the user's needs and check the rationality and feasibility of the user's experimental plans and provide an improved plan. \n"
+        f"{PLAN_PROMPT_NOTE}"
+        f"{PLAN_NOTE_CNT+1}. Directly output your improved complete experimental plan, don't output anything else."
+    )
+
+    agent = Agent(
+        name="Plan Aggregation Agent",
+        model_settings=CFG.PLAN_AGGREGATION_MODEL_SETTING,
+        system_prompt=prompt,
+        tools=PLAN_AGENT_TOOLS,
+        max_agent_iterations=CFG.PLAN_MAX_AGENT_ITERS,
+        logger=logger
+    )
+
+    return agent
+
+
+async def _single_plan(run_info, user_request, idx, plan_templates):
+
+    plan_input = (
+        f"<user_request>\n{user_request}\n</user_request>\n\n"
+    )
+    if plan_templates is not None:
+        plan_input = (
+            f"<some_reference_plans>\n"
+            f"{plan_templates}\n\n"
+            f"</some_reference_plans>\n\n"
+        ) + plan_input + (
+            f"The above content starts with some reference experimental plans for other possible similar tasks, "
+            "followed by the current user's request. \n"
+            f"Please provide an experimental plan according to the user's request.\n"
+        )
+    else:
+        plan_input += (
+            f"The above is the user's request. \n"
+            f"Please provide an experimental plan according to the user's request.\n"
+        )
+
+
+    plan_agent = create_plan_agent()
-    plan_agent = create_plan_agent()
+    plan_agent = create_plan_agent(logger=run_info['logger'])
-    plan_agent = create_plan_agent()
+    plan_agent = create_plan_agent(logger=run_info['logger'])
+
+    result = await plan_agent.chat(plan_input)
+    cur_plan = result.content
+
+    with open(f"{run_info['root']}/experiment_plans/plan_{idx}.md", "w", encoding='utf-8') as f:
+        f.write(cur_plan)
+
+    plan_agent.save_messages(f"{run_info['root']}/agent_logs/plan_agent_{idx}.json")
+
+    return cur_plan
+
+
+async def chat_plan_agent(run_info: dict):
+
+    user_request = run_info["user_request"]
+    logger = run_info['logger']
+    root = run_info['root']
+
+    os.makedirs(f"{root}/experiment_plans", exist_ok=True)
+
+    plan_templates = None
+    tasks = [
+        _single_plan(run_info, user_request, i, plan_templates)
+        for i in range(CFG.MAX_PLANS)
+    ]
+
+    try:
+        plan_list = await asyncio.gather(*tasks, return_exceptions=False)
+    except Exception as e:
+        raise RuntimeError(f"Error in plan generation: {e}")
+
+    assert len(plan_list) == CFG.MAX_PLANS
+
+    experiment_plan_strings = ""
+    for i in range(len(plan_list)):
+        experiment_plan_strings += f"\n\n{'-'*5} Begin of plan {i} {'-'*5}\n\n"
+        experiment_plan_strings += f"{plan_list[i]}"
+        experiment_plan_strings += f"\n\n{'-'*5} End of plan {i} {'-'*5}\n\n"
+
+    plan_aggregation_input = (
+        f"<user_request>\n\n{user_request}\n\n</user_request>\n\n"
+        f"<experimental_plans>\n\n"f"{experiment_plan_strings}\n\n</experimental_plans>\n\n"
+        "The above are the user's request and some experimental plans.\n"
+        "Now, please provide an improved experimental plan according to the user's request."
+    )
+
+    plan_aggregation_agent = create_plan_aggregation_agent(logger=logger)
+
+    debug_round = 0
+
+    while True:
+        debug_round += 1
+        if debug_round > CFG.MAX_PLAN_DEBUG_ROUND:
+            raise RuntimeError(f"Data availablility check failed after maximum plan debug rounds ({CFG.MAX_PLAN_DEBUG_ROUND}).")
+
+        result = await plan_aggregation_agent.chat(plan_aggregation_input)
+
+        plan_aggregation_agent.save_messages(f"{root}/agent_logs/plan_aggregation_agent_round_{debug_round-1}.json")
+
+        run_info["experiment_plan"] = result.content
+        check_result = await chat_data_check_agent(run_info, save_round=debug_round-1)
+        if check_result['pass']:
+            break
+
+        plan_aggregation_input = (
+            f"{check_result['reason']}\n\n"
+            "The data availability check failed due to the above reasons. "
+            "You can use corresponding function tools to confirm the data information. "
+            "Please provide a modified experimental plan."
+        )
+    with open(f"{root}/experiment_plans/final_plan.md", "w", encoding='utf-8') as f:
+        f.write(result.content)
+
+    return run_info
diff --git a/scieval/agents/EarthLink/config.py b/scieval/agents/EarthLink/config.py
@@ -0,0 +1,24 @@
+API_KEY = None
+BASE_URL = None
+
+EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-8B"
+EMBEDDING_API_KEY = None
+EMBEDDING_BASE_URL = None
+
+TAVILY_API_KEY = None
+
+DEFAULT_MODEL = "gpt-5"
+DEFAULT_MODEL_PROVIDER = "openai"
+
+
+MAX_PLANS = 3
+PLAN_MODEL = None
+PLAN_MODEL_SETTING = {}
+PLAN_MAX_AGENT_ITERS = 60
+
+PLAN_AGGREGATION_MODEL_SETTING = {}
+
+DATA_CHECK_MODEL_SETTING = {"reasoning_effort": "low"}
+DATA_CHECK_MAX_AGENT_ITERS = 60
+
+MAX_PLAN_DEBUG_ROUND = 2