Skip to content

Commit 0ec2d7f

Browse files
committed
fixed Intent switch detection false switching
1 parent e8d2d8d commit 0ec2d7f

5 files changed

Lines changed: 57 additions & 44 deletions

File tree

DSL/Ruuter.private/rag-search/POST/ckb/agency_data_import.yml

Lines changed: 0 additions & 33 deletions
This file was deleted.

src/tool_classifier/api_semantic_searcher.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -210,17 +210,21 @@ async def search(
210210
connection_id: Optional[str] = None,
211211
top_k: int = API_TOOL_SEARCH_TOP_K,
212212
precomputed_embedding: Optional[List[float]] = None,
213+
min_cosine_override: Optional[float] = None,
213214
) -> List[APIToolSearchResult]:
214215
"""Search api_tool_collection for the best matching API endpoints.
215216
216217
Uses a two-step approach:
217218
1. Dense search → get real cosine similarity scores
218219
2. Hybrid search (dense + sparse + RRF) → get best-ranked matches
219220
220-
Returns endpoints annotated with confidence level:
221+
The effective minimum cosine threshold is determined by ``min_cosine_override``
222+
when provided, otherwise ``API_TOOL_MIN_THRESHOLD`` is used. Confidence levels
223+
are always evaluated against the same effective minimum:
224+
221225
- "high": cosine >= API_TOOL_HIGH_CONFIDENCE_THRESHOLD AND score gap is large
222-
- "medium": cosine >= API_TOOL_MIN_THRESHOLD but ambiguous
223-
- "none": cosine < API_TOOL_MIN_THRESHOLD (no match)
226+
- "medium": cosine >= effective minimum threshold but ambiguous
227+
- "none": cosine < effective minimum threshold (no match)
224228
225229
Args:
226230
query: Natural language user query.
@@ -230,6 +234,11 @@ async def search(
230234
precomputed_embedding: Dense vector already computed upstream (e.g. by
231235
the service classifier). When provided the embedding step is skipped
232236
entirely, saving one embedding API call per request.
237+
min_cosine_override: Optional cosine similarity threshold that replaces
238+
``API_TOOL_MIN_THRESHOLD`` for this call. Pass a lower value to
239+
broaden matching (e.g. during multi-intent re-classification) or a
240+
higher value to tighten it. When None, ``API_TOOL_MIN_THRESHOLD``
241+
applies unchanged.
233242
234243
Returns:
235244
List containing exactly one APIToolSearchResult (the resolved best match),
@@ -263,17 +272,23 @@ async def search(
263272
)
264273
cosine_gap = top_cosine - second_cosine
265274

275+
effective_min = (
276+
min_cosine_override
277+
if min_cosine_override is not None
278+
else API_TOOL_MIN_THRESHOLD
279+
)
280+
266281
logger.info(f"APISemanticSearcher: query={query!r}")
267282
logger.info(
268283
f"APISemanticSearcher: dense top={dense_results[0]['name']} "
269284
f"(cosine={top_cosine:.4f}), gap={cosine_gap:.4f}"
270285
)
271286

272287
# Below minimum threshold → no match
273-
if top_cosine < API_TOOL_MIN_THRESHOLD:
288+
if top_cosine < effective_min:
274289
logger.info(
275290
f"APISemanticSearcher: cosine {top_cosine:.4f} < "
276-
f"threshold {API_TOOL_MIN_THRESHOLD} — no API tool match"
291+
f"threshold {effective_min} — no API tool match"
277292
)
278293
return []
279294

@@ -319,11 +334,12 @@ async def search(
319334

320335
if (
321336
point_cosine >= API_TOOL_HIGH_CONFIDENCE_THRESHOLD
337+
and point_cosine >= effective_min
322338
and effective_gap >= API_TOOL_SCORE_GAP_THRESHOLD
323339
and i == 0
324340
):
325341
confidence = "high"
326-
elif point_cosine >= API_TOOL_MIN_THRESHOLD:
342+
elif point_cosine >= effective_min:
327343
confidence = "medium"
328344
else:
329345
continue # Skip results below threshold

src/tool_classifier/classifier.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
DENSE_MIN_THRESHOLD,
3939
DENSE_HIGH_CONFIDENCE_THRESHOLD,
4040
DENSE_SCORE_GAP_THRESHOLD,
41+
API_TOOL_INTENT_SWITCH_THRESHOLD,
4142
)
4243

4344
from tool_classifier.sparse_encoder import SparseVector, compute_sparse_vector
@@ -191,7 +192,9 @@ async def classify(
191192
# If so, abandon the old session and start fresh rather
192193
# than treating the new query as a param-collection reply.
193194
new_api_match = await self._try_api_tool_classification(
194-
query, request
195+
query,
196+
request,
197+
min_cosine_override=API_TOOL_INTENT_SWITCH_THRESHOLD,
195198
)
196199
if (
197200
new_api_match is not None
@@ -760,6 +763,7 @@ async def _try_api_tool_classification(
760763
query: str,
761764
request: Optional[OrchestrationRequest] = None,
762765
precomputed_embedding: Optional[List[float]] = None,
766+
min_cosine_override: Optional[float] = None,
763767
) -> Optional[ClassificationResult]:
764768
"""Search api_tool_collection and return a ClassificationResult if a match is found.
765769
@@ -772,6 +776,12 @@ async def _try_api_tool_classification(
772776
precomputed_embedding: Dense embedding vector already computed for this
773777
query by the service search step. When provided, the ATC searcher
774778
reuses it instead of making a second embedding API call.
779+
min_cosine_override: Optional cosine similarity threshold that replaces
780+
the default ATC minimum score configured in the searcher. When
781+
provided, only endpoints whose cosine score meets or exceeds this
782+
value are considered a match. Use a lower value to broaden matching
783+
(e.g. during multi-intent re-classification) or a higher value to
784+
tighten it. When None, the searcher's default threshold applies.
775785
776786
Returns:
777787
ClassificationResult with API_TOOL_CALLING workflow if a match is found,
@@ -790,6 +800,7 @@ async def _try_api_tool_classification(
790800
environment=environment,
791801
connection_id=connection_id,
792802
precomputed_embedding=precomputed_embedding,
803+
min_cosine_override=min_cosine_override,
793804
)
794805
if results:
795806
matched = results[0]

src/tool_classifier/constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@
130130
API_TOOL_SCORE_GAP_THRESHOLD = 0.05
131131
"""Cosine score gap (top - second) for high-confidence API tool classification."""
132132

133+
API_TOOL_INTENT_SWITCH_THRESHOLD = 0.50
134+
"""Minimum cosine required to abandon an active session and switch intent.
135+
Higher than API_TOOL_MIN_THRESHOLD (0.40).
136+
Only a clear, unambiguous new query (cosine >= 0.50) should override a session."""
137+
133138

134139
# ============================================================================
135140
# Agentic Loop — Continuation Threshold

tests/api_tool_eval/test-endpoints.json

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@
3838
{
3939
"endpointId": "3b8d4e33-7f6a-4c2d-a911-2c4b8f330004",
4040
"name": "get_vehicle_tax_info",
41-
"description": "Arvuta sõidukimaks registreerimisnumbri alusel.",
42-
"url": "https://avalik.emta.ee/msm-public/v1/vehicle-tax",
43-
"method": "POST",
41+
"description": "Arvuta sõidukimaks ja registreerimistasu Eesti sõiduki registreerimisnumbri alusel. Tagastab aastase sõidukimaksu ja ühekordse registreerimistasu koos CO2 ja massikomponentidega.",
42+
"url": "https://avalik.emta.ee/msm-public/v1/vehicle-tax/calculate-by-reg-nr",
43+
"method": "GET",
4444
"params": [
45-
{ "name": "registrationNumber", "type": "string", "required": true, "description": "Sõiduki registreerimisnumber" }
45+
{ "name": "regNr", "type": "string", "required": true, "description": "Sõiduki registreerimisnumber (nt 123ABC, 456DEF)" },
46+
{ "name": "calculationYear", "type": "integer", "required": true, "description": "Aasta, mille kohta maksu arvutatakse (nt 2026, 2027). Kohustuslik parameeter — kasutaja peab aasta täpsustama." },
47+
{ "name": "showRegFee", "type": "boolean", "required": false, "description": "Kui true, lisatakse vastusesse ka ühekordne registreerimistasu (regFee). Vaikimisi false. Kasuta true, kui kasutaja küsib registreerimistasu kohta." }
4648
]
4749
},
4850
{
@@ -154,5 +156,17 @@
154156
"url": "https://ilmmicroservice.envir.ee/api/forecasts",
155157
"method": "GET",
156158
"params": []
159+
},
160+
{
161+
"endpointId": "9b4d0e99-7a8b-4c9d-b033-9d1c5f990016",
162+
"name": "check_legal_eligibility",
163+
"description": "Kontrolli isiku õiguslikku sobivust teatud toimingute tegemiseks vanuse ja kodakondsuse alusel.",
164+
"url": "https://bcd831cf-cd1b-40b9-af86-b760cd6143e8.mock.pstmn.io/api/legal-check",
165+
"method": "POST",
166+
"params": [
167+
{ "name": "action", "type": "string", "required": true, "description": "Toiming, mille jaoks sobivust kontrollitakse (nt start_business, vote, drive)" },
168+
{ "name": "citizenship", "type": "string", "required": true, "description": "Kahetäheline ISO kodakondsuskood (nt EE, LV, FI)" },
169+
{ "name": "age", "type": "integer", "required": true, "description": "Isiku vanus täisaastates" }
170+
]
157171
}
158172
]

0 commit comments

Comments
 (0)