Skip to content

Commit eaca9d9

Browse files
SL-Marclaude
andcommitted
Fix Ollama context window: query model num_ctx instead of defaulting to 2048
OllamaProvider now queries the model's actual context_length from /api/show at init time and passes num_ctx in every request. Without this, Ollama defaulted to 2048 tokens, silently truncating ~80% of paper text before the summarization LLM could see it. Both mistral and qwen2.5-coder:14b support 32768. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 782de63 commit eaca9d9

1 file changed

Lines changed: 27 additions & 2 deletions

File tree

quantcoder/llm/providers.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,31 @@ def __init__(
6262
self.base_url = self.base_url[:-3]
6363
self.timeout = timeout
6464
self.logger = logging.getLogger(f"quantcoder.{self.__class__.__name__}")
65-
self.logger.info(f"Initialized OllamaProvider: {self.base_url}, model={self.model}")
65+
self._num_ctx = self._query_context_length()
66+
self.logger.info(
67+
f"Initialized OllamaProvider: {self.base_url}, model={self.model}, "
68+
f"num_ctx={self._num_ctx}"
69+
)
70+
71+
def _query_context_length(self) -> int:
72+
"""Query model's context window from Ollama, default 32768."""
73+
import urllib.request
74+
import json as _json
75+
try:
76+
req = urllib.request.Request(
77+
f"{self.base_url}/api/show",
78+
data=_json.dumps({"name": self.model}).encode(),
79+
headers={"Content-Type": "application/json"},
80+
method="POST",
81+
)
82+
with urllib.request.urlopen(req, timeout=5) as resp:
83+
data = _json.loads(resp.read())
84+
for key, val in data.get("model_info", {}).items():
85+
if "context_length" in key:
86+
return int(val)
87+
except Exception:
88+
pass
89+
return 32768
6690

6791
async def chat(
6892
self,
@@ -81,7 +105,8 @@ async def chat(
81105
"stream": False,
82106
"options": {
83107
"temperature": temperature,
84-
"num_predict": max_tokens
108+
"num_predict": max_tokens,
109+
"num_ctx": self._num_ctx,
85110
}
86111
}
87112

0 commit comments

Comments
 (0)