Metaculus · CodexVeritas · Jan 26, 2025 · Jan 26, 2025 · Jan 26, 2025 · Jan 26, 2025
diff --git a/.env.template b/.env.template
@@ -1,14 +1,18 @@
 PYTHONPATH=.
 
-# Currently not being used as models, but might be in the future
-PERPLEXITY_API_KEY=
+
+# As of Jan 23rd 2025, OpenAI and Exa are the most heavily used. Most bots only use a subset of these services
 OPENAI_API_KEY=
 EXA_API_KEY=
+PERPLEXITY_API_KEY=
+DEEPSEEK_API_KEY=
+ASKNEWS_CLIENT_ID=
+ASKNEWS_SECRET=
 
 # Fill this in if using the Metaculus API
 METACULUS_TOKEN=
 
-# Right now only used for free semantic similarity calculation in Deduplicator, but defaults to OpenAI if not filled in
+# AS of Jan 23rd 2025, only used for free semantic similarity calculation in Deduplicator, but defaults to OpenAI if not filled in
 HUGGINGFACE_API_KEY=
 
 # Disable if in Streamlit Cloud

diff --git a/.github/workflows/run-bot-aib-tournament.yaml b/.github/workflows/run-bot-aib-tournament.yaml
@@ -37,7 +37,7 @@ jobs:
         run: poetry install --no-interaction --no-root
       - name: Run bot
         run: |
-          poetry run python run_bot.py --skip-previous true --tournament 32627
+          poetry run python run_bot.py --skip_previous True --tournament 32627
         # this reads the environment variables from the github repository.
         # Store under Settings --> Secrets and variables --> Actions
         # Not all these variables are required. See the ReadMe for more details.

diff --git a/.github/workflows/run-bot-quarterly-cup.yaml b/.github/workflows/run-bot-quarterly-cup.yaml
@@ -0,0 +1,52 @@
+name: Run Bot for Metaculus Quarterly Cup
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 */2 * *" # runs at midnight every 2 days
+
+# Add concurrency group to prevent parallel runs
+concurrency:
+  group: ${{ github.workflow }}
+  cancel-in-progress: false
+
+# Daily job to run the forecast bot
+jobs:
+  run_bot:
+    runs-on: ubuntu-latest # determines the machine that will run the job - keep as is
+    steps: # sets up the steps that will be run in order
+      # setup repository with all necessary dependencies - keep as is
+      - name: Check out repository
+        uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install poetry
+        uses: snok/install-poetry@v1
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          installer-parallel: true
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v4
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
+      - name: Install dependencies
+        run: poetry install --no-interaction --no-root
+      - name: Run bot
+        run: |
+          poetry run python run_bot.py --skip_previous False --tournament quarterly-cup
+        # this reads the environment variables from the github repository.
+        # Store under Settings --> Secrets and variables --> Actions
+        # Not all these variables are required. See the ReadMe for more details.
+        env:
+          METACULUS_TOKEN: ${{ secrets.METACULUS_TOKEN }}
+          PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
+          CODA_API_KEY: ${{ secrets.CODA_API_KEY }}
+          PYTHONPATH: .
diff --git a/code_tests/low_cost_or_live_api_tests/test_forecasting_bots.py b/code_tests/low_cost_or_live_api_tests/test_forecasting_bots.py
@@ -37,7 +37,6 @@ async def test_predicts_test_question(
     question_type: type[MetaculusQuestion],
     bot: ForecastBot,
 ) -> None:
-
     question = ReportOrganizer.get_live_example_question_of_type(question_type)
     assert isinstance(question, question_type)
     target_cost_in_usd = 0.3
@@ -48,6 +47,7 @@ async def test_predicts_test_question(
         expected_report_type = (
             ReportOrganizer.get_report_type_for_question_type(question_type)
         )
+    await report.publish_report_to_metaculus()
     assert isinstance(report, expected_report_type)
     assert cost_manager.current_usage <= target_cost_in_usd
     assert len(report.report_sections) > 1
@@ -56,7 +56,6 @@ async def test_predicts_test_question(
     assert report.price_estimate is not None
     assert report.minutes_taken is not None
     assert report.question is not None
-    await report.publish_report_to_metaculus()
 
     updated_question = MetaculusApi.get_question_by_post_id(
         question.id_of_post

diff --git a/code_tests/low_cost_or_live_api_tests/test_metaculus_api.py b/code_tests/low_cost_or_live_api_tests/test_metaculus_api.py
@@ -136,14 +136,25 @@ def test_questions_returned_from_list_questions() -> None:
     if ForecastingTestManager.quarterly_cup_is_not_active():
         pytest.skip("Quarterly cup is not active")
 
-    ai_tournament_id = (
+    tournament_id = (
         ForecastingTestManager.TOURNAMENT_WITH_MIXTURE_OF_OPEN_AND_NOT_OPEN
     )
     questions = MetaculusApi.get_all_open_questions_from_tournament(
-        ai_tournament_id
+        tournament_id
     )
     assert len(questions) > 0
-    # TODO: Add a tournament ID field and assert that the tournament is the same
+    assert all(question.state == QuestionState.OPEN for question in questions)
+
+    quarterly_cup_slug = "quarterly-cup"
+    questions = MetaculusApi.get_all_open_questions_from_tournament(
+        quarterly_cup_slug
+    )
+    assert len(questions) > 0
+    assert all(
+        quarterly_cup_slug in question.tournament_slugs
+        for question in questions
+    )
+    assert all(question.state == QuestionState.OPEN for question in questions)
 
 
 def test_get_questions_from_tournament() -> None:
@@ -259,11 +270,18 @@ def test_get_benchmark_questions(num_questions_to_get: int) -> None:
             ApiFilter(
                 close_time_gt=datetime(2024, 1, 15),
                 close_time_lt=datetime(2024, 1, 20),
-                allowed_tournament_slugs=["quarterly-cup-2024q1"],
+                allowed_tournaments=["quarterly-cup-2024q1"],
             ),
             1,
             False,
         ),
+        (
+            ApiFilter(
+                allowed_tournaments=[32506],  # Q4 AIB Metaculus Tournament
+            ),
+            None,
+            False,
+        ),
         (
             ApiFilter(
                 num_forecasters_gte=50,
@@ -278,13 +296,18 @@ def test_get_benchmark_questions(num_questions_to_get: int) -> None:
     ],
 )
 async def test_get_questions_from_tournament_with_filter(
-    api_filter: ApiFilter, num_questions: int, randomly_sample: bool
+    api_filter: ApiFilter, num_questions: int | None, randomly_sample: bool
 ) -> None:
     questions = await MetaculusApi.get_questions_matching_filter(
-        num_questions, api_filter, randomly_sample
+        api_filter,
+        num_questions=num_questions,
+        randomly_sample=randomly_sample,
     )
     assert_questions_match_filter(questions, api_filter)
-    assert len(questions) == num_questions
+    if num_questions is not None:
+        assert len(questions) == num_questions
+    else:
+        assert len(questions) > 0
     assert_basic_attributes_at_percentage(questions, 0.8)
 
 
@@ -306,7 +329,7 @@ async def test_question_status_filters(
         allowed_statuses=[state.value for state in status_filter]
     )
     questions = await MetaculusApi.get_questions_matching_filter(
-        250, api_filter, randomly_sample=True
+        api_filter, num_questions=250, randomly_sample=True
     )
     for question in questions:
         assert question.state in status_filter
@@ -318,19 +341,19 @@ async def test_question_status_filters(
     "api_filter, num_questions_in_tournament, randomly_sample",
     [
         (
-            ApiFilter(allowed_tournament_slugs=["quarterly-cup-2024q1"]),
+            ApiFilter(allowed_tournaments=["quarterly-cup-2024q1"]),
             46,
             False,
         ),
         (
-            ApiFilter(allowed_tournament_slugs=["quarterly-cup-2024q1"]),
+            ApiFilter(allowed_tournaments=["quarterly-cup-2024q1"]),
             46,
             True,
         ),
         (
             ApiFilter(
                 includes_bots_in_aggregates=False,
-                allowed_tournament_slugs=["aibq4"],
+                allowed_tournaments=["aibq4"],
             ),
             1,
             False,
@@ -346,8 +369,8 @@ async def test_fails_to_get_questions_if_filter_is_too_restrictive(
 
     with pytest.raises(Exception):
         await MetaculusApi.get_questions_matching_filter(
-            requested_questions,
             api_filter,
+            num_questions=requested_questions,
             randomly_sample=randomly_sample,
         )
 
@@ -523,11 +546,17 @@ def assert_questions_match_filter(  # NOSONAR
                 question.open_time and question.open_time < filter.open_time_lt
             ), f"Question {question.id_of_post} opened at {question.open_time}, expected before {filter.open_time_lt}"
 
-        if filter.allowed_tournament_slugs:
+        if filter.allowed_tournaments and all(
+            isinstance(tournament, str)
+            for tournament in filter.allowed_tournaments
+        ):
+            # TODO: Handle when an allowed tournament is an int ID rather than a slug
+            # TODO: As of Jan 25, 2025 you can pass in a question series slug and get back questions.
+            #       this should be collected in the question
             assert any(
-                slug in filter.allowed_tournament_slugs
+                slug in filter.allowed_tournaments
                 for slug in question.tournament_slugs
-            ), f"Question {question.id_of_post} tournaments {question.tournament_slugs} not in allowed tournaments {filter.allowed_tournament_slugs}"
+            ), f"Question {question.id_of_post} tournaments {question.tournament_slugs} not in allowed tournaments {filter.allowed_tournaments}"
 
         if filter.community_prediction_exists is not None:
             assert filter.allowed_types == [

diff --git a/forecasting_tools/ai_models/exa_searcher.py b/forecasting_tools/ai_models/exa_searcher.py
@@ -82,7 +82,7 @@ class ExaSearcher(
     RequestLimitedModel, RetryableModel, TimeLimitedModel, IncursCost
 ):
     REQUESTS_PER_PERIOD_LIMIT = (
-        4  # For rate limits see https://docs.exa.ai/reference/rate-limits
+        3  # For rate limits see https://docs.exa.ai/reference/rate-limits
     )
     REQUEST_PERIOD_IN_SECONDS = 1
     TIMEOUT_TIME = 30

diff --git a/forecasting_tools/ai_models/model_archetypes/general_llm.py b/forecasting_tools/ai_models/model_archetypes/general_llm.py
@@ -88,6 +88,7 @@ async def _invoke_with_request_cost_time_and_token_limits_and_retry(
         self, *args, **kwargs
     ) -> Any:
         logger.debug(f"Invoking model with args: {args} and kwargs: {kwargs}")
+        MonetaryCostManager.raise_error_if_limit_would_be_reached()
         direct_call_response = await self._mockable_direct_call_to_model(
             *args, **kwargs
         )
@@ -97,6 +98,8 @@ async def _invoke_with_request_cost_time_and_token_limits_and_retry(
             else direct_call_response
         )
         logger.debug(f"Model responded with: {response_to_log}...")
+        cost = direct_call_response.cost
+        MonetaryCostManager.increase_current_usage_in_parent_managers(cost)
         return direct_call_response
 
     @classmethod
@@ -122,7 +125,7 @@ async def _mockable_direct_call_to_model(
         choices = typeguard.check_type(choices, list[Choices])
         answer = choices[0].message.content
         assert isinstance(answer, str)
-        usage = response.usage
+        usage = response.usage  # type: ignore
         assert isinstance(usage, Usage)
         prompt_tokens = usage.prompt_tokens
         completion_tokens = usage.completion_tokens
@@ -133,7 +136,6 @@ async def _mockable_direct_call_to_model(
         ]  # If this has problems, consider using the budgetmanager class
         if cost is None:
             cost = 0
-        MonetaryCostManager.increase_current_usage_in_parent_managers(cost)
 
         return TextTokenCostResponse(
             data=answer,

diff --git a/forecasting_tools/ai_models/perplexity.py b/forecasting_tools/ai_models/perplexity.py
@@ -1,18 +1,16 @@
 from typing import Final
 
-from forecasting_tools.ai_models.model_archetypes.perplexity_text_model import (
-    PerplexityTextModel,
+from forecasting_tools.ai_models.model_archetypes.general_llm import (
+    GeneralTextToTextLlm,
 )
 
 
-class Perplexity(PerplexityTextModel):
-    MODEL_NAME: Final[str] = "llama-3.1-sonar-huge-128k-online"
+class Perplexity(GeneralTextToTextLlm):
+    MODEL_NAME: Final[str] = "perplexity/sonar-pro"
     REQUESTS_PER_PERIOD_LIMIT: Final[int] = (
         40  # Technically 50, but giving wiggle room
     )
     REQUEST_PERIOD_IN_SECONDS: Final[int] = 60
     TIMEOUT_TIME: Final[int] = 120
     TOKENS_PER_PERIOD_LIMIT: Final[int] = 2000000
     TOKEN_PERIOD_IN_SECONDS: Final[int] = 60
-    PRICE_PER_TOKEN: Final[float] = 0.000005
-    PRICE_PER_REQUEST: Final[float] = 0.005
diff --git a/forecasting_tools/forecasting/forecast_bots/bot_lists.py b/forecasting_tools/forecasting/forecast_bots/bot_lists.py
@@ -5,6 +5,9 @@
 from forecasting_tools.forecasting.forecast_bots.official_bots.q1_template_bot import (
     Q1TemplateBot,
 )
+from forecasting_tools.forecasting.forecast_bots.official_bots.q1_veritas_bot import (
+    Q1VeritasBot,
+)
 from forecasting_tools.forecasting.forecast_bots.official_bots.q3_template_bot import (
     Q3TemplateBot,
 )
@@ -33,13 +36,12 @@ def get_all_official_bot_classes() -> list[type[ForecastBot]]:
         Q3TemplateBot,
         Q4TemplateBot,
         Q4VeritasBot,
+        Q1VeritasBot,
     ]
 
 
 def get_all_bots_for_doing_cheap_tests() -> list[ForecastBot]:
-    return [
-        TemplateBot(),
-    ]
+    return [TemplateBot()]
 
 
 def get_all_bot_question_type_pairs_for_cheap_tests() -> (