Metaculus · CodexVeritas · Feb 20, 2025 · Feb 18, 2025 · Feb 18, 2025 · Feb 18, 2025
diff --git a/README.ipynb b/README.ipynb
@@ -2,19 +2,19 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "[NbConvertApp] Converting notebook README.ipynb to markdown\n",
-      "[NbConvertApp] Writing 41441 bytes to README.md\n",
+      "[NbConvertApp] Writing 41186 bytes to README.md\n",
       "┌──────────┬────────────┬───────────┐\n",
       "│ \u001b[1mlast_day\u001b[0m │ \u001b[1mlast_month\u001b[0m │ \u001b[1mlast_week\u001b[0m │\n",
       "├──────────┼────────────┼───────────┤\n",
-      "│      154 │      7,586 │     1,243 │\n",
+      "│      147 │      7,522 │     1,376 │\n",
       "└──────────┴────────────┴───────────┘\n",
       "\n"
      ]
@@ -170,7 +170,6 @@
     "    background_info=\"...\", # Or 'None'\n",
     "    resolution_criteria=\"...\", # Or 'None'\n",
     "    fine_print=\"...\", # Or 'None'\n",
-    "    id_of_post=0, # The ID and state only matters if using Metaculus API calls so feel free to set it to 0\n",
     ")\n",
     "\n",
     "reports = await bot.forecast_questions([question1, question2])\n",
@@ -659,7 +658,6 @@
     "    background_info=\"...\", # Or 'None'\n",
     "    resolution_criteria=\"...\", # Or 'None'\n",
     "    fine_print=\"...\", # Or 'None'\n",
-    "    id_of_post=0, # The ID and state only matters if using Metaculus API calls so feel free to set it to 0\n",
     ")\n",
     "\n",
     "# Find key factors\n",

diff --git a/README.md b/README.md
@@ -98,7 +98,6 @@ question2 = BinaryQuestion(
     background_info="...", # Or 'None'
     resolution_criteria="...", # Or 'None'
     fine_print="...", # Or 'None'
-    id_of_post=0, # The ID and state only matters if using Metaculus API calls so feel free to set it to 0
 )
 
 reports = await bot.forecast_questions([question1, question2])
@@ -479,7 +478,6 @@ question = BinaryQuestion(
     background_info="...", # Or 'None'
     resolution_criteria="...", # Or 'None'
     fine_print="...", # Or 'None'
-    id_of_post=0, # The ID and state only matters if using Metaculus API calls so feel free to set it to 0
 )
 
 # Find key factors
@@ -623,7 +621,6 @@ The `GeneralLlm` class is a wrapper around around litellm's acompletion function
 
 
 ```python
-from forecasting_tools import GeneralLlm
 
 result = await GeneralLlm(model="gpt-4o").invoke(prompt)
 result = await GeneralLlm(model="claude-3-5-sonnet-20241022").invoke(prompt)

diff --git a/code_tests/expensive_tests__run_individually/test_ex_base_rate_responder.py b/code_tests/expensive_tests__run_individually/test_ex_base_rate_responder.py
@@ -1,7 +1,7 @@
 import asyncio
 from datetime import datetime
 
-from forecasting_tools.forecasting.sub_question_researchers.base_rate_researcher import (
+from forecasting_tools.research_agents.base_rate_researcher import (
     BaseRateReport,
     BaseRateResearcher,
     DenominatorOption,

diff --git a/code_tests/expensive_tests__run_individually/test_ex_key_factors.py b/code_tests/expensive_tests__run_individually/test_ex_key_factors.py
@@ -5,8 +5,8 @@
 from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import (
     MonetaryCostManager,
 )
-from forecasting_tools.forecasting.helpers.metaculus_api import MetaculusApi
-from forecasting_tools.forecasting.sub_question_researchers.key_factors_researcher import (
+from forecasting_tools.forecast_helpers.metaculus_api import MetaculusApi
+from forecasting_tools.research_agents.key_factors_researcher import (
     KeyFactorsResearcher,
     ScoredKeyFactor,
 )

diff --git a/code_tests/expensive_tests__run_individually/test_ex_niche_list_researcher.py b/code_tests/expensive_tests__run_individually/test_ex_niche_list_researcher.py
@@ -8,7 +8,7 @@
 from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import (
     MonetaryCostManager,
 )
-from forecasting_tools.forecasting.sub_question_researchers.niche_list_researcher import (
+from forecasting_tools.research_agents.niche_list_researcher import (
     FactCheckedItem,
     NicheListResearcher,
 )
@@ -118,13 +118,21 @@ def test_large_lists_fail(things_to_generate: str) -> None:
         ),
         (
             "Times Apple was successfully sued by another entity for patent violations before Oct 16 2024",
+            # Consider asking instead whether Apple got money in the case as 'success' is not super clear and how do you define the difference between a string of court cases (Virtenx) versus not
             [
                 "Creative Technology v. Apple, Inc. (menu structure)",  # Settled in 2006 https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc.#:~:text=In%20August%202006%2C%20Apple%20and%20Creative%20settled%20the%20suit%20with%20Apple%20agreeing%20to%20pay%20Creative%20%24100%20million%20USD%20for%20the%20right%20to%20implement%20Creative%27s%20method%20of%20sorting%20songs%20on%20the%20iPod
-                "Apple vs. Masimo",  # Ban on some watch features https://time.com/6692718/apple-watch-masimo-alivecor-patent-antitrust-legal-explainer/#:~:text=the%20ITC%20imposed,welcomed%20by%20Masimo.
+                "Apple vs. Masimo (watch features)",  # Ban on some watch features https://time.com/6692718/apple-watch-masimo-alivecor-patent-antitrust-legal-explainer/#:~:text=the%20ITC%20imposed,welcomed%20by%20Masimo.
                 "Typhoon Touch Technologies (touch screen)",  # Settled in 2010 https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc.#:~:text=In%202010%2C%20Apple%20settled%20with%20Typhoon%20for%20an%20undisclosed%20sum%20and%20was%20then%20dismissed%20from%20the%20litigation%20as%20of%20September%202010
                 "Nokia v. Apple (wireless, iPhone)",  # Settled in 2007 https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc.#:~:text=For%20an%20undisclosed%20amount%20of%20cash%20and%20future%20ongoing%20iPhone%20royalties%20to%20be%20paid%20by%20Apple%2C%20Nokia%20agreed%20to%20settle%2C%20with%20Apple%27s%20royalty%20payments%20retroactively%20back%2Dpayable%20to%20the%20iPhone%27s%20introduction%20in%202007%2C%20but%20with%20no%20broad%20cross%2Dlicensing%20agreement%20made%20between%20the%20companies
+                "Science Applications International Corporation (Virnetx) vs Apple (2014)",  # Virnetx won in 2014 https://caselaw.findlaw.com/court/us-federal-circuit/1678303.html
+                "Apple vs. VirnetX (2012)",
+                "Apple vs. VirnetX (2016)",  # Did this one split into the 2017 and 2018 ones?
+                "Apple vs. VirnetX (2017)",
+                "Apple vs. VirnetX (2018)",
                 "Apple vs. VirnetX (2019)",
-                # VirnetX got money https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc.#:~:text=The%20first%20case,verdict%20against%20it
+                "Apple vs. VirnetX (2020)",  # Was this just a final trial of a previous case?
+                # Court cases for  2012, 2016, 2017, 2018, 2020 # https://caldwellcc.com/case/virnetx-inc-v-apple-inc/
+                # VirnetX got money in 2019 https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc.#:~:text=The%20first%20case,verdict%20against%20it
                 # A later 2020 VirnetX case was unsuccessful https://www.macrumors.com/2024/02/20/apple-wins-virnetx-503-million/#:~:text=After%20Apple%20appealed%20the%20initial,on%20or%20license%20its%20patents.
                 "Qualcomm vs. Apple (2017-2019): Power management/Download Speed",
                 # Paid 31million https://www.qualcomm.com/news/releases/2019/03/qualcomm-wins-patent-infringement-case-against-apple-san-diego
@@ -142,9 +150,9 @@ def test_large_lists_fail(things_to_generate: str) -> None:
                 # One claim was revered I think https://law.justia.com/cases/federal/appellate-courts/cafc/17-2102/17-2102-2018-08-16.html#:~:text=The%20court%20reversed%20in%20part%3B%20Core%E2%80%99s%20theory%20of%20infringement%20is%20inadequate%20to%20support%20a%20judgment%20on%20claim%2019.
                 # Might be same case as Nokia v Apple https://www.patentlyapple.com/2018/08/a-federal-appeals-court-has-ruled-that-apple-didnt-infringe-one-of-two-patents-in-case-brought-on-by-core-wireless-licensing.html#:~:text=The%20verdict%20capped%20a%20trial%20that%20kicked%20off%20on%20Dec.%205%20centering%20on%20two%20patents%20that%20were%20Originaly%20owned%20by%20Nokia
                 "Apple vs. Caltech (2016-2020)",  # Paid $838m https://appleinsider.com/articles/23/08/11/caltech-may-finally-settle-848-million-patent-case-against-apple?utm_medium=rss#:~:text=Caltech%20began%20its%20legal%20battle,amounts%20they%20had%20to%20pay.
+                "Ironworks Patents LLC vs Apple Inc (2010)",  # Paid $10million. See filing 749 and filing 8 https://dockets.justia.com/docket/delaware/dedce/1:2010cv00258/43925
+                "WARF vs Apple (2015)",  # $234mill https://news.wisc.edu/warf-wins-patent-infringement-lawsuit-against-apple/
                 # Invalid
-                # Secondar QualComm case
-                # Is this a different one? It was settled... I think Apple initiated? # https://www.reuters.com/technology/apple-loses-second-bid-challenge-qualcomm-patents-us-supreme-court-2022-10-03/#:~:text=The%20companies%20settled%20their%20underlying%20fight%20in%202019%2C%20signing%20an%20agreement%20worth%20billions%20of%20dollars%20that%20let%20Apple%20continue%20using%20Qualcomm%20chips%20in%20iPhones
                 # "Brazilian SEP Litigation",
                 # There was a case ruled in Brazil in Ericcson's favor, but this was part of the larger Apple v Ericcson dispute
                 # https://www.lickslegal.com/articles/ericsson-apple-settlement-came-hot-on-the-heels-of-landmark-brazilian-ruling-2#:~:text=In%20one%20of,in%20Ericsson%E2%80%99s%20favour.
@@ -156,7 +164,9 @@ def test_large_lists_fail(things_to_generate: str) -> None:
                 # https://natlawreview.com/article/some-touch-needed-federal-circuit-partially-confirms-ptabs-view-analogous-art#:~:text=After%20Corephotonics%20sued,with%20other%20references).
                 # https://cafc.uscourts.gov/opinions-orders/22-1350.OPINION.9-11-2023_2188207.pdf
                 # https://cafc.uscourts.gov/opinions-orders/22-1340.OPINION.10-16-2023_2205991.pdf
-                # Qualcomm vs. Apple : Modems - Apple Sued Qualcomm. They settled  https://www.inquartik.com/blog/case-intel-apple-qualcomm/#:~:text=Apple%20initially%20sued%20Qualcomm%20for%20%241%20billion%2C%20in%20China%2C%20it%20was%20for%20%24145%20million.%20During%20the%20period%20of%20legal%20action%2C%20Apple%20used%20Intel%E2%80%99s%20modems%20to%20build%20the%20iPhone%20XS.
+                # "Second QualComm case"
+                # Is this a different one? It was settled... I think Apple initiated? # https://www.reuters.com/technology/apple-loses-second-bid-challenge-qualcomm-patents-us-supreme-court-2022-10-03/#:~:text=The%20companies%20settled%20their%20underlying%20fight%20in%202019%2C%20signing%20an%20agreement%20worth%20billions%20of%20dollars%20that%20let%20Apple%20continue%20using%20Qualcomm%20chips%20in%20iPhones
+                # "Qualcomm vs. Apple : Modems - Apple Sued Qualcomm" # They settled  https://www.inquartik.com/blog/case-intel-apple-qualcomm/#:~:text=Apple%20initially%20sued%20Qualcomm%20for%20%241%20billion%2C%20in%20China%2C%20it%20was%20for%20%24145%20million.%20During%20the%20period%20of%20legal%20action%2C%20Apple%20used%20Intel%E2%80%99s%20modems%20to%20build%20the%20iPhone%20XS.
                 # List of other cases (valid ones have been processed) https://en.wikipedia.org/w/index.php?title=Litigation_involving_Apple_Inc.
             ],
         ),

diff --git a/code_tests/expensive_tests__run_individually/test_ex_question_responders.py b/code_tests/expensive_tests__run_individually/test_ex_question_responders.py
@@ -5,18 +5,16 @@
 
 from forecasting_tools.ai_models.ai_utils.ai_misc import clean_indents
 from forecasting_tools.ai_models.deprecated_model_classes.gpt4o import Gpt4o
-from forecasting_tools.forecasting.sub_question_researchers.base_rate_researcher import (
+from forecasting_tools.research_agents.base_rate_researcher import (
     BaseRateResearcher,
 )
-from forecasting_tools.forecasting.sub_question_researchers.general_researcher import (
+from forecasting_tools.research_agents.general_researcher import (
     GeneralResearcher,
 )
-from forecasting_tools.forecasting.sub_question_researchers.question_responder import (
+from forecasting_tools.research_agents.question_responder import (
     QuestionResponder,
 )
-from forecasting_tools.forecasting.sub_question_researchers.question_router import (
-    QuestionRouter,
-)
+from forecasting_tools.research_agents.question_router import QuestionRouter
 
 
 #################################### HELPERS ####################################

diff --git a/code_tests/expensive_tests__run_individually/test_ex_smart_searcher.py b/code_tests/expensive_tests__run_individually/test_ex_smart_searcher.py
@@ -3,7 +3,7 @@
 from code_tests.utilities_for_tests.coroutine_testing import (
     assert_coroutines_run_under_x_times_duration_of_benchmark,
 )
-from forecasting_tools.forecasting.helpers.smart_searcher import SmartSearcher
+from forecasting_tools.forecast_helpers.smart_searcher import SmartSearcher
 
 logger = logging.getLogger(__name__)
 

diff --git a/code_tests/low_cost_or_live_api_tests/test_ai_models/test_general_llm.py b/code_tests/low_cost_or_live_api_tests/test_ai_models/test_general_llm.py
@@ -31,3 +31,12 @@ def test_timeout_works() -> None:
     model = GeneralLlm(model="gpt-4o-mini", timeout=50)
     response = asyncio.run(model.invoke(model_input))
     assert response is not None, "Response is None"
+
+
+def test_litellm_params_work() -> None:
+    # Make sure it doesn't raise an exception
+    GeneralLlm(model="gpt-4o", temperature=0.1, max_tokens=100)
+
+    # Make sure it raises an exception if a non-litellm param is passed
+    with pytest.raises(Exception):
+        GeneralLlm(model="gpt-4o", temperature=0.1, non_litellm_param=100)
diff --git a/code_tests/low_cost_or_live_api_tests/test_asknews.py b/code_tests/low_cost_or_live_api_tests/test_asknews.py
@@ -4,9 +4,7 @@
 
 import pytest
 
-from forecasting_tools.forecasting.helpers.asknews_searcher import (
-    AskNewsSearcher,
-)
+from forecasting_tools.forecast_helpers.asknews_searcher import AskNewsSearcher
 
 logger = logging.getLogger(__name__)
 

diff --git a/code_tests/low_cost_or_live_api_tests/test_base_rate_responder.py b/code_tests/low_cost_or_live_api_tests/test_base_rate_responder.py
@@ -1,6 +1,6 @@
 import pytest
 
-from forecasting_tools.forecasting.sub_question_researchers.base_rate_researcher import (
+from forecasting_tools.research_agents.base_rate_researcher import (
     BaseRateResearcher,
 )
 

diff --git a/code_tests/low_cost_or_live_api_tests/test_benchmarker.py b/code_tests/low_cost_or_live_api_tests/test_benchmarker.py
@@ -7,13 +7,9 @@
 from code_tests.unit_tests.test_forecasting.forecasting_test_manager import (
     ForecastingTestManager,
 )
-from forecasting_tools.forecasting.forecast_bots.template_bot import (
-    TemplateBot,
-)
-from forecasting_tools.forecasting.helpers.benchmarker import Benchmarker
-from forecasting_tools.forecasting.questions_and_reports.benchmark_for_bot import (
-    BenchmarkForBot,
-)
+from forecasting_tools.data_models.benchmark_for_bot import BenchmarkForBot
+from forecasting_tools.forecast_bots.template_bot import TemplateBot
+from forecasting_tools.forecast_helpers.benchmarker import Benchmarker
 from forecasting_tools.util import file_manipulation
 
 

diff --git a/code_tests/low_cost_or_live_api_tests/test_deduplicator.py b/code_tests/low_cost_or_live_api_tests/test_deduplicator.py
@@ -2,9 +2,7 @@
 
 import pytest
 
-from forecasting_tools.forecasting.sub_question_researchers.deduplicator import (
-    Deduplicator,
-)
+from forecasting_tools.research_agents.deduplicator import Deduplicator
 
 logger = logging.getLogger(__name__)
 

diff --git a/code_tests/low_cost_or_live_api_tests/test_forecast_database_manager.py b/code_tests/low_cost_or_live_api_tests/test_forecast_database_manager.py
@@ -1,16 +1,12 @@
 import asyncio
 
-from forecasting_tools.forecasting.helpers.forecast_database_manager import (
+from forecasting_tools.data_models.data_organizer import DataOrganizer
+from forecasting_tools.data_models.forecast_report import ForecastReport
+from forecasting_tools.forecast_helpers.forecast_database_manager import (
     ForecastDatabaseManager,
     ForecastRunType,
 )
-from forecasting_tools.forecasting.questions_and_reports.data_organizer import (
-    DataOrganizer,
-)
-from forecasting_tools.forecasting.questions_and_reports.forecast_report import (
-    ForecastReport,
-)
-from forecasting_tools.forecasting.sub_question_researchers.base_rate_researcher import (
+from forecasting_tools.research_agents.base_rate_researcher import (
     BaseRateReport,
 )
 from forecasting_tools.util.coda_utils import CodaRow

diff --git a/code_tests/low_cost_or_live_api_tests/test_forecasting_bots.py b/code_tests/low_cost_or_live_api_tests/test_forecasting_bots.py
@@ -10,22 +10,14 @@
 from forecasting_tools.ai_models.resource_managers.monetary_cost_manager import (
     MonetaryCostManager,
 )
-from forecasting_tools.forecasting.forecast_bots.bot_lists import (
+from forecasting_tools.data_models.data_organizer import DataOrganizer
+from forecasting_tools.data_models.questions import MetaculusQuestion
+from forecasting_tools.forecast_bots.bot_lists import (
     get_all_bot_question_type_pairs_for_cheap_tests,
 )
-from forecasting_tools.forecasting.forecast_bots.forecast_bot import (
-    ForecastBot,
-)
-from forecasting_tools.forecasting.forecast_bots.template_bot import (
-    TemplateBot,
-)
-from forecasting_tools.forecasting.helpers.metaculus_api import MetaculusApi
-from forecasting_tools.forecasting.questions_and_reports.data_organizer import (
-    DataOrganizer,
-)
-from forecasting_tools.forecasting.questions_and_reports.questions import (
-    MetaculusQuestion,
-)
+from forecasting_tools.forecast_bots.forecast_bot import ForecastBot
+from forecasting_tools.forecast_bots.template_bot import TemplateBot
+from forecasting_tools.forecast_helpers.metaculus_api import MetaculusApi
 
 logger = logging.getLogger(__name__)
 
@@ -56,6 +48,7 @@ async def test_predicts_test_question(
     assert report.price_estimate is not None
     assert report.minutes_taken is not None
     assert report.question is not None
+    assert question.id_of_post is not None
 
     updated_question = MetaculusApi.get_question_by_post_id(
         question.id_of_post
@@ -88,7 +81,7 @@ async def test_no_reports_when_questions_already_forecasted(
     bot_type = TemplateBot
     bot = bot_type(skip_previously_forecasted_questions=True)
     ForecastingTestManager.mock_forecast_bot_run_forecast(bot_type, mocker)
-    questions = [ForecastingTestManager.get_fake_binary_questions()]
+    questions = [ForecastingTestManager.get_fake_binary_question()]
     questions = typeguard.check_type(questions, list[MetaculusQuestion])
 
     for question in questions:

diff --git a/code_tests/low_cost_or_live_api_tests/test_metaculus_api.py b/code_tests/low_cost_or_live_api_tests/test_metaculus_api.py
@@ -7,21 +7,19 @@
 from code_tests.unit_tests.test_forecasting.forecasting_test_manager import (
     ForecastingTestManager,
 )
-from forecasting_tools.forecasting.helpers.metaculus_api import (
-    ApiFilter,
-    MetaculusApi,
-)
-from forecasting_tools.forecasting.questions_and_reports.data_organizer import (
-    DataOrganizer,
-)
-from forecasting_tools.forecasting.questions_and_reports.questions import (
+from forecasting_tools.data_models.data_organizer import DataOrganizer
+from forecasting_tools.data_models.questions import (
     BinaryQuestion,
     DateQuestion,
     MetaculusQuestion,
     MultipleChoiceQuestion,
     NumericQuestion,
     QuestionState,
 )
+from forecasting_tools.forecast_helpers.metaculus_api import (
+    ApiFilter,
+    MetaculusApi,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -121,7 +119,7 @@ def test_post_binary_prediction_on_question() -> None:
 
 
 def test_post_binary_prediction_error_when_out_of_range() -> None:
-    question = ForecastingTestManager.get_fake_binary_questions()
+    question = ForecastingTestManager.get_fake_binary_question()
     question_id = question.id_of_post
     with pytest.raises(ValueError):
         MetaculusApi.post_binary_question_prediction(question_id, 0)

diff --git a/code_tests/low_cost_or_live_api_tests/test_smart_searcher.py b/code_tests/low_cost_or_live_api_tests/test_smart_searcher.py
@@ -4,7 +4,7 @@
 import pytest
 
 from forecasting_tools.ai_models.general_llm import GeneralLlm
-from forecasting_tools.forecasting.helpers.smart_searcher import SmartSearcher
+from forecasting_tools.forecast_helpers.smart_searcher import SmartSearcher
 
 logger = logging.getLogger(__name__)