BasisResearch · eb8680 · Apr 1, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.github/workflows/test_llm.yml b/.github/workflows/test_llm.yml
@@ -14,6 +14,7 @@ jobs:
     strategy:
       matrix:
         python-version: ["'3.13'"]
+        model: ["gpt-4o-mini"]
     steps:
       - uses: actions/checkout@v4
 
@@ -34,7 +35,8 @@ jobs:
 
       - name: Run LLM integration tests
         env:
+          EFFECTFUL_LLM_MODEL: ${{ matrix.model }}
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
         run: |
-          uv run pytest tests/test_handlers_llm_provider.py -v --tb=short
+          uv run pytest tests/test_handlers_llm_provider.py tests/test_handlers_llm_tool_calling_poem.py tests/test_handlers_llm_tool_calling_book.py -v --tb=short
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,5 +1,24 @@
+import os
+
+import litellm
 import pytest
 
+EFFECTFUL_LLM_MODEL = os.environ.get("EFFECTFUL_LLM_MODEL", "gpt-4o-mini")
+
+_HAS_LLM_API_KEY = litellm.validate_environment(model=EFFECTFUL_LLM_MODEL)[
+    "keys_in_environment"
+]
+
+requires_llm = pytest.mark.skipif(
+    not _HAS_LLM_API_KEY,
+    reason=f"No API key configured for model {EFFECTFUL_LLM_MODEL}",
+)
+
+requires_vision = pytest.mark.skipif(
+    not litellm.supports_vision(model=EFFECTFUL_LLM_MODEL),
+    reason=f"Model {EFFECTFUL_LLM_MODEL} does not support vision",
+)
+
 UNIMPLEMENTED_SUBSTRINGS = [
     "infer.JitTrace_ELBO",
     "the event_dim arg",

diff --git a/..._prompt_multiple_models[gpt-4o-mini].json → ...tLiteLLMProvider__test_simple_prompt.json b/..._prompt_multiple_models[gpt-4o-mini].json → ...tLiteLLMProvider__test_simple_prompt.json
diff --git a/...rovider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json b/...rovider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json
diff --git a/...llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json b/...llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json
diff --git a/...llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json b/...llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json
diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
@@ -28,9 +28,7 @@
 from effectful.internals.unification import nested_type
 from effectful.ops.semantics import handler
 from effectful.ops.types import Operation, Term
-from tests.test_handlers_llm_tool_calling_book import requires_openai
-
-CHEAP_MODEL = "gpt-4o-mini"
+from tests.conftest import EFFECTFUL_LLM_MODEL, requires_llm
 
 # ---------------------------------------------------------------------------
 # Module-level type definitions
@@ -744,14 +742,14 @@ def _encode_tool_spec(tool: Tool[..., Any]) -> dict[str, Any]:
     raise TypeError(f"Unexpected encoded tool spec type: {type(tool_spec_obj)}")
 
 
-@requires_openai
+@requires_llm
 @pytest.mark.parametrize("ty,_value,ctx", PROVIDER_CASES)
 def test_litellm_completion_accepts_encodable_response_model_for_supported_types(
     ty: Any, _value: Any, ctx: Mapping[str, Any] | None
 ) -> None:
     enc = Encodable.define(ty, ctx)
     kwargs: dict[str, Any] = {
-        "model": CHEAP_MODEL,
+        "model": EFFECTFUL_LLM_MODEL,
         "messages": [
             {
                 "role": "user",
@@ -777,7 +775,7 @@ def test_litellm_completion_accepts_encodable_response_model_for_supported_types
     pydantic.TypeAdapter(enc.base).validate_python(decoded)
 
 
-@requires_openai
+@requires_llm
 @pytest.mark.parametrize("ty,_value,ctx", PROVIDER_CASES)
 def test_litellm_completion_accepts_tool_with_type_as_param(
     ty: Any, _value: Any, ctx: Mapping[str, Any] | None
@@ -793,7 +791,7 @@ def _fn(value):
 
     tool: Tool[..., Any] = Tool.define(_fn)
     response = litellm.completion(
-        model=CHEAP_MODEL,
+        model=EFFECTFUL_LLM_MODEL,
         messages=[{"role": "user", "content": "Return hello, do NOT call any tools."}],
         tools=[_encode_tool_spec(tool)],
         tool_choice="none",
@@ -802,7 +800,7 @@ def _fn(value):
     assert response is not None
 
 
-@requires_openai
+@requires_llm
 @pytest.mark.parametrize("ty,_value,ctx", PROVIDER_CASES)
 def test_litellm_completion_accepts_tool_with_type_as_return(
     ty: Any, _value: Any, ctx: Mapping[str, Any] | None
@@ -818,7 +816,7 @@ def _fn():
 
     tool: Tool[..., Any] = Tool.define(_fn)
     response = litellm.completion(
-        model=CHEAP_MODEL,
+        model=EFFECTFUL_LLM_MODEL,
         messages=[{"role": "user", "content": "Return hello, do NOT call any tools."}],
         tools=[_encode_tool_spec(tool)],
         tool_choice="none",