Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions .env.template
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
PYTHONPATH=.

# Currently not being used as models, but might be in the future
PERPLEXITY_API_KEY=

# As of Jan 23rd 2025, OpenAI and Exa are the most heavily used. Most bots only use a subset of these services
OPENAI_API_KEY=
EXA_API_KEY=
PERPLEXITY_API_KEY=
DEEPSEEK_API_KEY=
ASKNEWS_CLIENT_ID=
ASKNEWS_SECRET=

# Fill this in if using the Metaculus API
METACULUS_TOKEN=

# Right now only used for free semantic similarity calculation in Deduplicator, but defaults to OpenAI if not filled in
# AS of Jan 23rd 2025, only used for free semantic similarity calculation in Deduplicator, but defaults to OpenAI if not filled in
HUGGINGFACE_API_KEY=

# Disable if in Streamlit Cloud
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-bot-aib-tournament.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
run: poetry install --no-interaction --no-root
- name: Run bot
run: |
poetry run python run_bot.py --skip-previous true --tournament 32627
poetry run python run_bot.py --skip_previous True --tournament 32627
# this reads the environment variables from the github repository.
# Store under Settings --> Secrets and variables --> Actions
# Not all these variables are required. See the ReadMe for more details.
Expand Down
52 changes: 52 additions & 0 deletions .github/workflows/run-bot-quarterly-cup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Run Bot for Metaculus Quarterly Cup

on:
workflow_dispatch:
schedule:
- cron: "0 0 */2 * *" # runs at midnight every 2 days

# Add concurrency group to prevent parallel runs
concurrency:
group: ${{ github.workflow }}
cancel-in-progress: false

# Daily job to run the forecast bot
jobs:
run_bot:
runs-on: ubuntu-latest # determines the machine that will run the job - keep as is
steps: # sets up the steps that will be run in order
# setup repository with all necessary dependencies - keep as is
- name: Check out repository
uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install dependencies
run: poetry install --no-interaction --no-root
- name: Run bot
run: |
poetry run python run_bot.py --skip_previous False --tournament quarterly-cup
# this reads the environment variables from the github repository.
# Store under Settings --> Secrets and variables --> Actions
# Not all these variables are required. See the ReadMe for more details.
env:
METACULUS_TOKEN: ${{ secrets.METACULUS_TOKEN }}
PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
CODA_API_KEY: ${{ secrets.CODA_API_KEY }}
PYTHONPATH: .
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ async def test_predicts_test_question(
question_type: type[MetaculusQuestion],
bot: ForecastBot,
) -> None:

question = ReportOrganizer.get_live_example_question_of_type(question_type)
assert isinstance(question, question_type)
target_cost_in_usd = 0.3
Expand All @@ -48,6 +47,7 @@ async def test_predicts_test_question(
expected_report_type = (
ReportOrganizer.get_report_type_for_question_type(question_type)
)
await report.publish_report_to_metaculus()
assert isinstance(report, expected_report_type)
assert cost_manager.current_usage <= target_cost_in_usd
assert len(report.report_sections) > 1
Expand All @@ -56,7 +56,6 @@ async def test_predicts_test_question(
assert report.price_estimate is not None
assert report.minutes_taken is not None
assert report.question is not None
await report.publish_report_to_metaculus()

updated_question = MetaculusApi.get_question_by_post_id(
question.id_of_post
Expand Down
59 changes: 44 additions & 15 deletions code_tests/low_cost_or_live_api_tests/test_metaculus_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,25 @@ def test_questions_returned_from_list_questions() -> None:
if ForecastingTestManager.quarterly_cup_is_not_active():
pytest.skip("Quarterly cup is not active")

ai_tournament_id = (
tournament_id = (
ForecastingTestManager.TOURNAMENT_WITH_MIXTURE_OF_OPEN_AND_NOT_OPEN
)
questions = MetaculusApi.get_all_open_questions_from_tournament(
ai_tournament_id
tournament_id
)
assert len(questions) > 0
# TODO: Add a tournament ID field and assert that the tournament is the same
assert all(question.state == QuestionState.OPEN for question in questions)

quarterly_cup_slug = "quarterly-cup"
questions = MetaculusApi.get_all_open_questions_from_tournament(
quarterly_cup_slug
)
assert len(questions) > 0
assert all(
quarterly_cup_slug in question.tournament_slugs
for question in questions
)
assert all(question.state == QuestionState.OPEN for question in questions)


def test_get_questions_from_tournament() -> None:
Expand Down Expand Up @@ -259,11 +270,18 @@ def test_get_benchmark_questions(num_questions_to_get: int) -> None:
ApiFilter(
close_time_gt=datetime(2024, 1, 15),
close_time_lt=datetime(2024, 1, 20),
allowed_tournament_slugs=["quarterly-cup-2024q1"],
allowed_tournaments=["quarterly-cup-2024q1"],
),
1,
False,
),
(
ApiFilter(
allowed_tournaments=[32506], # Q4 AIB Metaculus Tournament
),
None,
False,
),
(
ApiFilter(
num_forecasters_gte=50,
Expand All @@ -278,13 +296,18 @@ def test_get_benchmark_questions(num_questions_to_get: int) -> None:
],
)
async def test_get_questions_from_tournament_with_filter(
api_filter: ApiFilter, num_questions: int, randomly_sample: bool
api_filter: ApiFilter, num_questions: int | None, randomly_sample: bool
) -> None:
questions = await MetaculusApi.get_questions_matching_filter(
num_questions, api_filter, randomly_sample
api_filter,
num_questions=num_questions,
randomly_sample=randomly_sample,
)
assert_questions_match_filter(questions, api_filter)
assert len(questions) == num_questions
if num_questions is not None:
assert len(questions) == num_questions
else:
assert len(questions) > 0
assert_basic_attributes_at_percentage(questions, 0.8)


Expand All @@ -306,7 +329,7 @@ async def test_question_status_filters(
allowed_statuses=[state.value for state in status_filter]
)
questions = await MetaculusApi.get_questions_matching_filter(
250, api_filter, randomly_sample=True
api_filter, num_questions=250, randomly_sample=True
)
for question in questions:
assert question.state in status_filter
Expand All @@ -318,19 +341,19 @@ async def test_question_status_filters(
"api_filter, num_questions_in_tournament, randomly_sample",
[
(
ApiFilter(allowed_tournament_slugs=["quarterly-cup-2024q1"]),
ApiFilter(allowed_tournaments=["quarterly-cup-2024q1"]),
46,
False,
),
(
ApiFilter(allowed_tournament_slugs=["quarterly-cup-2024q1"]),
ApiFilter(allowed_tournaments=["quarterly-cup-2024q1"]),
46,
True,
),
(
ApiFilter(
includes_bots_in_aggregates=False,
allowed_tournament_slugs=["aibq4"],
allowed_tournaments=["aibq4"],
),
1,
False,
Expand All @@ -346,8 +369,8 @@ async def test_fails_to_get_questions_if_filter_is_too_restrictive(

with pytest.raises(Exception):
await MetaculusApi.get_questions_matching_filter(
requested_questions,
api_filter,
num_questions=requested_questions,
randomly_sample=randomly_sample,
)

Expand Down Expand Up @@ -523,11 +546,17 @@ def assert_questions_match_filter( # NOSONAR
question.open_time and question.open_time < filter.open_time_lt
), f"Question {question.id_of_post} opened at {question.open_time}, expected before {filter.open_time_lt}"

if filter.allowed_tournament_slugs:
if filter.allowed_tournaments and all(
isinstance(tournament, str)
for tournament in filter.allowed_tournaments
):
# TODO: Handle when an allowed tournament is an int ID rather than a slug
# TODO: As of Jan 25, 2025 you can pass in a question series slug and get back questions.
# this should be collected in the question
assert any(
slug in filter.allowed_tournament_slugs
slug in filter.allowed_tournaments
for slug in question.tournament_slugs
), f"Question {question.id_of_post} tournaments {question.tournament_slugs} not in allowed tournaments {filter.allowed_tournament_slugs}"
), f"Question {question.id_of_post} tournaments {question.tournament_slugs} not in allowed tournaments {filter.allowed_tournaments}"

if filter.community_prediction_exists is not None:
assert filter.allowed_types == [
Expand Down
2 changes: 1 addition & 1 deletion forecasting_tools/ai_models/exa_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class ExaSearcher(
RequestLimitedModel, RetryableModel, TimeLimitedModel, IncursCost
):
REQUESTS_PER_PERIOD_LIMIT = (
4 # For rate limits see https://docs.exa.ai/reference/rate-limits
3 # For rate limits see https://docs.exa.ai/reference/rate-limits
)
REQUEST_PERIOD_IN_SECONDS = 1
TIMEOUT_TIME = 30
Expand Down
6 changes: 4 additions & 2 deletions forecasting_tools/ai_models/model_archetypes/general_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ async def _invoke_with_request_cost_time_and_token_limits_and_retry(
self, *args, **kwargs
) -> Any:
logger.debug(f"Invoking model with args: {args} and kwargs: {kwargs}")
MonetaryCostManager.raise_error_if_limit_would_be_reached()
direct_call_response = await self._mockable_direct_call_to_model(
*args, **kwargs
)
Expand All @@ -97,6 +98,8 @@ async def _invoke_with_request_cost_time_and_token_limits_and_retry(
else direct_call_response
)
logger.debug(f"Model responded with: {response_to_log}...")
cost = direct_call_response.cost
MonetaryCostManager.increase_current_usage_in_parent_managers(cost)
return direct_call_response

@classmethod
Expand All @@ -122,7 +125,7 @@ async def _mockable_direct_call_to_model(
choices = typeguard.check_type(choices, list[Choices])
answer = choices[0].message.content
assert isinstance(answer, str)
usage = response.usage
usage = response.usage # type: ignore
assert isinstance(usage, Usage)
prompt_tokens = usage.prompt_tokens
completion_tokens = usage.completion_tokens
Expand All @@ -133,7 +136,6 @@ async def _mockable_direct_call_to_model(
] # If this has problems, consider using the budgetmanager class
if cost is None:
cost = 0
MonetaryCostManager.increase_current_usage_in_parent_managers(cost)

return TextTokenCostResponse(
data=answer,
Expand Down
10 changes: 4 additions & 6 deletions forecasting_tools/ai_models/perplexity.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from typing import Final

from forecasting_tools.ai_models.model_archetypes.perplexity_text_model import (
PerplexityTextModel,
from forecasting_tools.ai_models.model_archetypes.general_llm import (
GeneralTextToTextLlm,
)


class Perplexity(PerplexityTextModel):
MODEL_NAME: Final[str] = "llama-3.1-sonar-huge-128k-online"
class Perplexity(GeneralTextToTextLlm):
MODEL_NAME: Final[str] = "perplexity/sonar-pro"
REQUESTS_PER_PERIOD_LIMIT: Final[int] = (
40 # Technically 50, but giving wiggle room
)
REQUEST_PERIOD_IN_SECONDS: Final[int] = 60
TIMEOUT_TIME: Final[int] = 120
TOKENS_PER_PERIOD_LIMIT: Final[int] = 2000000
TOKEN_PERIOD_IN_SECONDS: Final[int] = 60
PRICE_PER_TOKEN: Final[float] = 0.000005
PRICE_PER_REQUEST: Final[float] = 0.005
8 changes: 5 additions & 3 deletions forecasting_tools/forecasting/forecast_bots/bot_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from forecasting_tools.forecasting.forecast_bots.official_bots.q1_template_bot import (
Q1TemplateBot,
)
from forecasting_tools.forecasting.forecast_bots.official_bots.q1_veritas_bot import (
Q1VeritasBot,
)
from forecasting_tools.forecasting.forecast_bots.official_bots.q3_template_bot import (
Q3TemplateBot,
)
Expand Down Expand Up @@ -33,13 +36,12 @@ def get_all_official_bot_classes() -> list[type[ForecastBot]]:
Q3TemplateBot,
Q4TemplateBot,
Q4VeritasBot,
Q1VeritasBot,
]


def get_all_bots_for_doing_cheap_tests() -> list[ForecastBot]:
return [
TemplateBot(),
]
return [TemplateBot()]


def get_all_bot_question_type_pairs_for_cheap_tests() -> (
Expand Down
Loading
Loading