-
Notifications
You must be signed in to change notification settings - Fork 7
Implement lazy loading of datasets and agents & add EarthLink agent #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feature/agent-framework
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,8 +7,8 @@ | |
|
|
||
| from tqdm import tqdm | ||
|
|
||
| from scieval.agents import create_agent, get_available_agents | ||
| from scieval.agents.records import EvalRecord, TrajectoryStore | ||
| from scieval.agents.smolagents import SmolAgentsAgent | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The import |
||
| from scieval.dataset import build_dataset | ||
| from scieval.smp import dump, get_logger, load, timestr, githash, ls | ||
|
|
||
|
|
@@ -32,11 +32,38 @@ def _build_dataset_from_config(cfg: Dict[str, Any], dataset_name: str): | |
|
|
||
|
|
||
| def _build_agent_from_config(cfg: Dict[str, Any], agent_name: str): | ||
| """ | ||
| Build an agent from configuration. | ||
|
|
||
| Args: | ||
| cfg: Configuration dictionary | ||
| agent_name: Name of the agent in the config | ||
|
|
||
| Returns: | ||
| Agent instance | ||
|
|
||
| Raises: | ||
| ValueError: If agent class is not supported | ||
| ImportError: If agent dependencies are not installed | ||
| """ | ||
| config = copy.deepcopy(cfg[agent_name]) | ||
| cls_name = config.pop("class", "SmolAgentsAgent") | ||
| if cls_name not in ["SmolAgentsAgent", "smolagents"]: | ||
| raise ValueError(f"Unsupported agent class: {cls_name}") | ||
| return SmolAgentsAgent(**config) | ||
|
|
||
| # Handle legacy name mapping | ||
| if cls_name == "smolagents": | ||
| cls_name = "SmolAgentsAgent" | ||
|
|
||
| try: | ||
| return create_agent(cls_name, **config) | ||
| except ImportError as e: | ||
| available = get_available_agents() | ||
| available_list = [name for name, avail in available.items() if avail] | ||
| raise ImportError( | ||
| f"Failed to create agent '{cls_name}'. " | ||
| f"Required dependencies may not be installed.\n" | ||
| f"Available agents: {', '.join(available_list) if available_list else 'None'}\n" | ||
| f"Error: {e}" | ||
|
Comment on lines
+61
to
+65
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error message for |
||
| ) from e | ||
|
|
||
|
|
||
| def _run_one_sample( | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,81 @@ | ||||||||||
| import json | ||||||||||
| from .. import config as CFG | ||||||||||
| from ..tools.data import ALL_DATA_TOOLS | ||||||||||
| from ..utils.common import extract_code_blocks | ||||||||||
| from ..utils.agent import Agent | ||||||||||
|
|
||||||||||
|
|
||||||||||
| def create_data_check_agent(logger=None): | ||||||||||
|
|
||||||||||
| prompt = ( | ||||||||||
| "You are a geoscience experiment agent. " | ||||||||||
| "You understand the user's needs and call related functions to confirm current available CMIP data and observational data information. " | ||||||||||
| "Then judge whether the current available data can meet the user's needs. \n" | ||||||||||
| "## Note: \n" | ||||||||||
| "1. Do not guess the available data information, you should call the function to obtain the available data information.\n" | ||||||||||
| "2. Only judge based on data information obtained from the function, we can not use any external web data.\n" | ||||||||||
| "3. You can use the corresponding function tools to find the variable name abbreviations in CMIP, CMIP model names, names of variable that can be derived, avaiable reference datasets, etc.\n" | ||||||||||
| "(note that the variable names in the observation datasets have been processed to be the same as those in CMIP). \n" | ||||||||||
| "4. If it can or partially can (for example, there are multiple solutions in the plan, and some solutions can be met), the check passes and the reason can be concise.\n" | ||||||||||
| "If it cannot (for example, the necessary observation data is missing or all solutions in all plans cannot be met), the check fails and you need to give as detailed a reason as possible.\n" | ||||||||||
| "## Ouput format:\n" | ||||||||||
| "```json\n{\"pass\": true or false, \"reason\": \"...\"}\n```" | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| agent = Agent( | ||||||||||
| name="Data Check Agent", | ||||||||||
| model_settings=CFG.DATA_CHECK_MODEL_SETTING, | ||||||||||
| system_prompt=prompt, | ||||||||||
| tools=ALL_DATA_TOOLS, | ||||||||||
| max_agent_iterations=CFG.DATA_CHECK_MAX_AGENT_ITERS, | ||||||||||
| logger=logger, | ||||||||||
| ) | ||||||||||
| return agent | ||||||||||
|
|
||||||||||
|
|
||||||||||
| async def chat_data_check_agent(run_info: dict, save_round: int = 0) -> dict: | ||||||||||
|
|
||||||||||
| user_request = run_info["user_request"] | ||||||||||
| experiment_plan = run_info["experiment_plan"] | ||||||||||
| logger = run_info['logger'] | ||||||||||
|
|
||||||||||
| data_check_agent = create_data_check_agent(logger=logger) | ||||||||||
|
|
||||||||||
| data_check_input = ( | ||||||||||
| f"<user_request>\n\n{user_request}\n\n</user_request>\n\n" | ||||||||||
| f"<experiment_plan>\n\n{experiment_plan}\n\n</experiment_plan>\n\n" | ||||||||||
| "Now, please judge whether the available data can meet the user's needs. " | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| max_try = 20 | ||||||||||
| cur_try = 0 | ||||||||||
| while True: | ||||||||||
| if cur_try >= max_try: | ||||||||||
| raise RuntimeError( | ||||||||||
| f"Failed to get valid JSON output from the data check agent after {max_try} tries." | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| cur_try += 1 | ||||||||||
|
|
||||||||||
| result = await data_check_agent.chat(data_check_input) | ||||||||||
|
|
||||||||||
| try: | ||||||||||
| code = extract_code_blocks(result.content, language='json') | ||||||||||
| if code is not None: | ||||||||||
| output = json.loads(code) | ||||||||||
| else: | ||||||||||
| output = json.loads(result.content) | ||||||||||
| if ("pass" not in output): | ||||||||||
| raise ValueError("Output JSON must contain 'pass' and 'reason' keys.") | ||||||||||
|
Comment on lines
+68
to
+69
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
|
||||||||||
| if (not output['pass']) and ("reason" not in output): | ||||||||||
| raise ValueError("If 'pass' is false, output JSON must contain 'reason' key.") | ||||||||||
| break | ||||||||||
| except Exception as e: | ||||||||||
| data_check_input = ( | ||||||||||
| f"Output is not a valid JSON or does not contain the required keys. Error: \n{e}\n" | ||||||||||
| "Please output in the following format:\n" | ||||||||||
| "```json\n{\"pass\": true or false, \"reason\": \"...\"}\n```" | ||||||||||
| ) | ||||||||||
| data_check_agent.save_messages(f"{run_info['root']}/agent_logs/data_check_agent_round_{save_round}.json") | ||||||||||
|
|
||||||||||
| return output | ||||||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,177 @@ | ||||||
| import os | ||||||
| import asyncio | ||||||
|
|
||||||
| from .. import config as CFG | ||||||
| from ..tools.web_search import web_search | ||||||
| from ..tools.data import ALL_DATA_TOOLS | ||||||
| from ..utils.agent import Agent | ||||||
|
|
||||||
| from .data_check import chat_data_check_agent | ||||||
|
|
||||||
|
|
||||||
| # For both plan and plan check agents | ||||||
| PLAN_PROMPT_NOTE = ( | ||||||
| "## Note: \n" | ||||||
| "1. Experiments can only use CMIP (Coupled Model Intercomparison Project Phase) datasets and some observation datasets.\n" | ||||||
| "2. The datasets used should be based on the user's request.\n" | ||||||
| "3. It is not mandatory to use CMIP data. If there is no special instruction and the observation data can meet the requirements, the observation data should be used.\n" | ||||||
| "4. If the required observation datasets are not available, you can use CMIP datasets instead. \n" | ||||||
| "5. You can use the web search tool to search for relevant scientific definitions and calculation steps.\n" | ||||||
| "6. You can use corresponding function tools to find the available cmip data, available observation data, CMIP model names, variable name abbreviations in CMIP, names of variable that can be derived, etc. " | ||||||
| "(note that the variable names in the observation datasets have been processed to be the same as those in CMIP). \n" | ||||||
| "7. If the variables required to complete the user task do not exist in CMIP, the corresponding variables should be calculated using existing variables.\n" | ||||||
| "8. If not specified, monthly data is preferred.\n" | ||||||
| "9. The plan should be as detailed and specific as possible, such as the time period, variable name, unit, etc. of the data used. " | ||||||
| "However, it should not include path names, specific parameter configurations (such as color map or line thickness for plotting, etc.), specific execution operations or codes (such as what packages are used for data processing and plotting), etc.\n" | ||||||
| "10. The plan should not include data download, sensitivity experiments, reproducibility, documentation, or future considerations.\n" | ||||||
| "11. The plan cannot contain any preconceived conclusions. \n" | ||||||
| ) | ||||||
| PLAN_NOTE_CNT = 11 | ||||||
|
|
||||||
|
|
||||||
| PLAN_AGENT_TOOLS = [ | ||||||
| web_search | ||||||
| ] + ALL_DATA_TOOLS | ||||||
|
|
||||||
|
|
||||||
| def create_plan_agent(logger=None): | ||||||
|
|
||||||
| prompt = ( | ||||||
| "You are a geoscience experiment agent who is good at experiment planning. " | ||||||
| "You understand the user's needs and output the corresponding experimental plan. " | ||||||
| "Your plan should include what data to use, what preprocessing needs to be done on the data, what calculations to perform, what kind of figures to draw, etc. \n" | ||||||
| f"{PLAN_PROMPT_NOTE}" | ||||||
| f"{PLAN_NOTE_CNT+1}. Output the plan directly, don't output anything else." | ||||||
| ) | ||||||
|
|
||||||
| agent = Agent( | ||||||
| name="Plan Agent", | ||||||
| model_settings=CFG.PLAN_MODEL_SETTING, | ||||||
| system_prompt=prompt, | ||||||
| tools=PLAN_AGENT_TOOLS, | ||||||
| max_agent_iterations=CFG.PLAN_MAX_AGENT_ITERS, | ||||||
| logger=logger, | ||||||
| verbose=False | ||||||
| ) | ||||||
|
|
||||||
| return agent | ||||||
|
|
||||||
|
|
||||||
| def create_plan_aggregation_agent(logger=None): | ||||||
|
|
||||||
| prompt = ( | ||||||
| "You are a geoscience experiment agent who is good at checking and making experimental plans. " | ||||||
| "You understand the user's needs and check the rationality and feasibility of the user's experimental plans and provide an improved plan. \n" | ||||||
| f"{PLAN_PROMPT_NOTE}" | ||||||
| f"{PLAN_NOTE_CNT+1}. Directly output your improved complete experimental plan, don't output anything else." | ||||||
| ) | ||||||
|
|
||||||
| agent = Agent( | ||||||
| name="Plan Aggregation Agent", | ||||||
| model_settings=CFG.PLAN_AGGREGATION_MODEL_SETTING, | ||||||
| system_prompt=prompt, | ||||||
| tools=PLAN_AGENT_TOOLS, | ||||||
| max_agent_iterations=CFG.PLAN_MAX_AGENT_ITERS, | ||||||
| logger=logger | ||||||
| ) | ||||||
|
|
||||||
| return agent | ||||||
|
|
||||||
|
|
||||||
| async def _single_plan(run_info, user_request, idx, plan_templates): | ||||||
|
|
||||||
| plan_input = ( | ||||||
| f"<user_request>\n{user_request}\n</user_request>\n\n" | ||||||
| ) | ||||||
| if plan_templates is not None: | ||||||
| plan_input = ( | ||||||
| f"<some_reference_plans>\n" | ||||||
| f"{plan_templates}\n\n" | ||||||
| f"</some_reference_plans>\n\n" | ||||||
| ) + plan_input + ( | ||||||
| f"The above content starts with some reference experimental plans for other possible similar tasks, " | ||||||
| "followed by the current user's request. \n" | ||||||
| f"Please provide an experimental plan according to the user's request.\n" | ||||||
| ) | ||||||
| else: | ||||||
| plan_input += ( | ||||||
| f"The above is the user's request. \n" | ||||||
| f"Please provide an experimental plan according to the user's request.\n" | ||||||
| ) | ||||||
|
|
||||||
|
|
||||||
| plan_agent = create_plan_agent() | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
|
||||||
|
|
||||||
| result = await plan_agent.chat(plan_input) | ||||||
| cur_plan = result.content | ||||||
|
|
||||||
| with open(f"{run_info['root']}/experiment_plans/plan_{idx}.md", "w", encoding='utf-8') as f: | ||||||
| f.write(cur_plan) | ||||||
|
|
||||||
| plan_agent.save_messages(f"{run_info['root']}/agent_logs/plan_agent_{idx}.json") | ||||||
|
|
||||||
| return cur_plan | ||||||
|
|
||||||
|
|
||||||
| async def chat_plan_agent(run_info: dict): | ||||||
|
|
||||||
| user_request = run_info["user_request"] | ||||||
| logger = run_info['logger'] | ||||||
| root = run_info['root'] | ||||||
|
|
||||||
| os.makedirs(f"{root}/experiment_plans", exist_ok=True) | ||||||
|
|
||||||
| plan_templates = None | ||||||
| tasks = [ | ||||||
| _single_plan(run_info, user_request, i, plan_templates) | ||||||
| for i in range(CFG.MAX_PLANS) | ||||||
| ] | ||||||
|
|
||||||
| try: | ||||||
| plan_list = await asyncio.gather(*tasks, return_exceptions=False) | ||||||
| except Exception as e: | ||||||
| raise RuntimeError(f"Error in plan generation: {e}") | ||||||
|
|
||||||
| assert len(plan_list) == CFG.MAX_PLANS | ||||||
|
|
||||||
| experiment_plan_strings = "" | ||||||
| for i in range(len(plan_list)): | ||||||
| experiment_plan_strings += f"\n\n{'-'*5} Begin of plan {i} {'-'*5}\n\n" | ||||||
| experiment_plan_strings += f"{plan_list[i]}" | ||||||
| experiment_plan_strings += f"\n\n{'-'*5} End of plan {i} {'-'*5}\n\n" | ||||||
|
|
||||||
| plan_aggregation_input = ( | ||||||
| f"<user_request>\n\n{user_request}\n\n</user_request>\n\n" | ||||||
| f"<experimental_plans>\n\n"f"{experiment_plan_strings}\n\n</experimental_plans>\n\n" | ||||||
| "The above are the user's request and some experimental plans.\n" | ||||||
| "Now, please provide an improved experimental plan according to the user's request." | ||||||
| ) | ||||||
|
|
||||||
| plan_aggregation_agent = create_plan_aggregation_agent(logger=logger) | ||||||
|
|
||||||
| debug_round = 0 | ||||||
|
|
||||||
| while True: | ||||||
| debug_round += 1 | ||||||
| if debug_round > CFG.MAX_PLAN_DEBUG_ROUND: | ||||||
| raise RuntimeError(f"Data availablility check failed after maximum plan debug rounds ({CFG.MAX_PLAN_DEBUG_ROUND}).") | ||||||
|
|
||||||
| result = await plan_aggregation_agent.chat(plan_aggregation_input) | ||||||
|
|
||||||
| plan_aggregation_agent.save_messages(f"{root}/agent_logs/plan_aggregation_agent_round_{debug_round-1}.json") | ||||||
|
|
||||||
| run_info["experiment_plan"] = result.content | ||||||
| check_result = await chat_data_check_agent(run_info, save_round=debug_round-1) | ||||||
| if check_result['pass']: | ||||||
| break | ||||||
|
|
||||||
| plan_aggregation_input = ( | ||||||
| f"{check_result['reason']}\n\n" | ||||||
| "The data availability check failed due to the above reasons. " | ||||||
| "You can use corresponding function tools to confirm the data information. " | ||||||
| "Please provide a modified experimental plan." | ||||||
| ) | ||||||
| with open(f"{root}/experiment_plans/final_plan.md", "w", encoding='utf-8') as f: | ||||||
| f.write(result.content) | ||||||
|
|
||||||
| return run_info | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| API_KEY = None | ||
| BASE_URL = None | ||
|
|
||
| EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-8B" | ||
| EMBEDDING_API_KEY = None | ||
| EMBEDDING_BASE_URL = None | ||
|
|
||
| TAVILY_API_KEY = None | ||
|
|
||
| DEFAULT_MODEL = "gpt-5" | ||
| DEFAULT_MODEL_PROVIDER = "openai" | ||
|
|
||
|
|
||
| MAX_PLANS = 3 | ||
| PLAN_MODEL = None | ||
| PLAN_MODEL_SETTING = {} | ||
| PLAN_MAX_AGENT_ITERS = 60 | ||
|
|
||
| PLAN_AGGREGATION_MODEL_SETTING = {} | ||
|
|
||
| DATA_CHECK_MODEL_SETTING = {"reasoning_effort": "low"} | ||
| DATA_CHECK_MAX_AGENT_ITERS = 60 | ||
|
|
||
| MAX_PLAN_DEBUG_ROUND = 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
.gitignoreentryvariables_embedding.jsonlis added without a preceding newline, which can sometimes lead to issues with the last line not being properly ignored by some git clients or tools. It's a good practice to ensure a newline at the end of the file.