Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
91 commits
Select commit Hold shift + click to select a range
3020e31
Merge pull request #122 from rootcodelabs/wip
nuwangeek Feb 20, 2026
6e5c22c
remove unwanted file
nuwangeek Feb 20, 2026
38d0533
updated changes
nuwangeek Feb 20, 2026
72b8ae1
fixed requested changes
nuwangeek Feb 20, 2026
9b7bc7b
fixed issue
nuwangeek Feb 20, 2026
46dd6c4
Merge pull request #123 from rootcodelabs/llm-316
nuwangeek Feb 21, 2026
068f4e0
Merge pull request #124 from buerokratt/wip
Thirunayan22 Feb 21, 2026
a2084e5
service workflow implementation without calling service endpoints
nuwangeek Feb 24, 2026
5216c09
Merge pull request #126 from rootcodelabs/wip
nuwangeek Feb 24, 2026
864ad30
fixed requested changes
nuwangeek Feb 24, 2026
25f9614
fixed issues
nuwangeek Feb 24, 2026
69c1279
protocol related requested changes
nuwangeek Feb 24, 2026
07f2e0f
fixed requested changes
nuwangeek Feb 24, 2026
f63f777
update time tracking
nuwangeek Feb 25, 2026
5429bc0
added time tracking and reloacate input guardrail before toolclassifiier
nuwangeek Feb 25, 2026
721263a
fixed issue
nuwangeek Feb 25, 2026
6ed02d1
Merge pull request #127 from buerokratt/wip
nuwangeek Feb 25, 2026
7238baa
Merge branch 'optimization/llm-304' into wip
nuwangeek Feb 25, 2026
ae7cfa0
Merge pull request #128 from rootcodelabs/wip
nuwangeek Feb 25, 2026
f8a82b6
fixed issue
nuwangeek Feb 25, 2026
3b89fba
added hybrid search for the service detection
nuwangeek Feb 26, 2026
789f062
update tool classifier
nuwangeek Mar 1, 2026
609e6d5
fixing merge conflicts
nuwangeek Mar 1, 2026
a30c52d
Merge pull request #129 from buerokratt/wip
nuwangeek Mar 1, 2026
8dfc155
Merge pull request #130 from rootcodelabs/wip
nuwangeek Mar 1, 2026
3d7fb85
updated intent data enrichment and service classification flow perfor…
nuwangeek Mar 2, 2026
bee9fbf
fixed issue
nuwangeek Mar 2, 2026
4888045
Merge pull request #131 from rootcodelabs/optimization/data-enrichment
nuwangeek Mar 3, 2026
0a0806f
optimize first user query response generation time
nuwangeek Mar 3, 2026
1eb8b47
fixed pr reviewed issues
nuwangeek Mar 3, 2026
94b4f39
Merge pull request #132 from buerokratt/wip
nuwangeek Mar 3, 2026
82b3fe5
Merge branch 'optimization/vector-indexer' into wip
nuwangeek Mar 3, 2026
1b4ada9
Merge pull request #134 from buerokratt/wip
nuwangeek Mar 3, 2026
bb1601f
service integration
nuwangeek Mar 8, 2026
9ce1da2
context based response generation flow
nuwangeek Mar 9, 2026
d647f86
fixed pr review suggested issues
nuwangeek Mar 9, 2026
d67214e
Merge pull request #135 from rootcodelabs/llm-309
nuwangeek Mar 9, 2026
b90ab52
Merge pull request #136 from rootcodelabs/llm-310
nuwangeek Mar 9, 2026
6c46d3c
removed service project layer
nuwangeek Mar 10, 2026
d3e1494
fixed issues
nuwangeek Mar 12, 2026
4add446
Merge pull request #137 from rootcodelabs/llm-310
nuwangeek Mar 12, 2026
c2ef115
delete unnessary files
nuwangeek Mar 13, 2026
97f6f1a
added requested changes
nuwangeek Mar 13, 2026
0be284e
Merge pull request #138 from buerokratt/wip
nuwangeek Mar 17, 2026
a32ca6d
Merge branch 'llm/service-integration' into wip
nuwangeek Mar 17, 2026
4276e7d
Merge pull request #140 from buerokratt/wip
nuwangeek Mar 18, 2026
24259a9
Merge pull request #141 from buerokratt/wip
nuwangeek Mar 20, 2026
1a54c5d
Merge pull request #143 from buerokratt/wip
nuwangeek Mar 23, 2026
95dc35a
Merge pull request #145 from buerokratt/wip
nuwangeek Mar 26, 2026
e047f41
Merge pull request #150 from buerokratt/wip
nuwangeek Apr 7, 2026
0486fa4
fix issue in prompt config toggle
nuwangeek Apr 7, 2026
9e9bd77
Merge pull request #151 from buerokratt/wip
nuwangeek Apr 8, 2026
2bf97f8
Merge branch 'llm-382' into wip
nuwangeek Apr 8, 2026
2f53999
Merge pull request #153 from buerokratt/wip
nuwangeek Apr 8, 2026
876889d
Intergartion of CKB import API for agency data sync
ruwinirathnamalala Apr 10, 2026
b710853
Intergartion of CKB import API for agency data sync
ruwinirathnamalala Apr 10, 2026
e31a0af
Merge pull request #154 from buerokratt/wip
nuwangeek Apr 16, 2026
f6a4300
Merge pull request #156 from buerokratt/wip
nuwangeek Apr 16, 2026
2806221
Merge branch 'wip' of https://github.com/rootcodelabs/LLM-Module into…
ruwinirathnamalala Apr 17, 2026
1fc3b9c
standalone agentic loop module
nuwangeek Apr 17, 2026
622c969
fixed requested changes
nuwangeek Apr 17, 2026
cf9723e
fixed ruff format issues
nuwangeek Apr 17, 2026
d159731
Merge pull request #157 from rootcodelabs/llm-394
nuwangeek Apr 22, 2026
83c7500
complete API semantic searcher with ambiguous result handling and too…
nuwangeek Apr 22, 2026
21c3c27
Merge pull request #158 from rootcodelabs/llm-394
nuwangeek Apr 22, 2026
591b119
Merge pull request #159 from rootcodelabs/llm-345-dev
nuwangeek Apr 22, 2026
c5582f8
complete semantic searcher evaluation and update to multi point index…
nuwangeek Apr 22, 2026
f569070
Merge pull request #160 from rootcodelabs/llm-403
nuwangeek Apr 22, 2026
80bfce7
competed integration of agentic loop with semantic searcher and strea…
nuwangeek Apr 22, 2026
8b984f1
Enhancements in data-sync flow and updated agency_id in agency_sync t…
ruwinirathnamalala Apr 23, 2026
d71a5eb
Merge pull request #161 from rootcodelabs/llm-408
nuwangeek Apr 24, 2026
6efe48b
Implemented the API caller module
nuwangeek Apr 24, 2026
51d8a0e
Completed integration of CKB and RAG changelogs with schema updates f…
nuwangeek Apr 28, 2026
2449472
Merge pull request #164 from buerokratt/wip
nuwangeek May 5, 2026
43e9ad3
Merge branch 'llm-345-dev' into wip
nuwangeek May 5, 2026
0ea073b
Merge pull request #167 from buerokratt/wip
nuwangeek May 6, 2026
c368cfd
Merge pull request #169 from buerokratt/wip
nuwangeek May 6, 2026
bdc878c
Merge pull request #171 from buerokratt/wip
nuwangeek May 6, 2026
a385166
Merge branch 'llm-348' into wip
nuwangeek May 6, 2026
59b604c
Merge pull request #173 from buerokratt/wip
nuwangeek May 6, 2026
49e9e77
Merge pull request #175 from buerokratt/wip
nuwangeek May 6, 2026
8e7ab98
Merge branch 'ckb_integration_for_data_sync' into wip
nuwangeek May 6, 2026
5b3ee08
Merge pull request #177 from buerokratt/wip
nuwangeek May 7, 2026
135ddf0
Merge branch 'llm-412' into wip
nuwangeek May 7, 2026
3671b6c
Merge pull request #179 from buerokratt/wip
nuwangeek May 7, 2026
e8d2d8d
Merge pull request #180 from buerokratt/wip
nuwangeek May 8, 2026
e1cab89
updated api tool calling to handle custom prompt configs
nuwangeek May 8, 2026
365ab39
Merge pull request #183 from buerokratt/wip
nuwangeek May 12, 2026
30bd893
Merge branch 'llm-430' into wip
nuwangeek May 12, 2026
ec316e8
Merge pull request #184 from rootcodelabs/wip
nuwangeek May 12, 2026
60cc861
Fixed copilot suggested changes
nuwangeek May 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/tool_classifier/agentic_loop.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Standalone agentic loop for multi-turn parameter collection."""

import asyncio
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from loguru import logger

Expand Down Expand Up @@ -106,6 +106,7 @@ async def run_turn(
awaiting_continuation: bool = False,
continuation_turn: int = CONTINUATION_TURN,
session_language: str = "en",
continuation_language: Optional[str] = None,
) -> AgenticLoopResult:
"""Process one user turn of the parameter-collection loop.
Expand Down Expand Up @@ -279,8 +280,9 @@ async def run_turn(
turn_count,
chat_id,
)
effective_continuation_lang = continuation_language or session_language
continuation_q = _CONTINUATION_QUESTIONS.get(
session_language, CONTINUATION_QUESTION
effective_continuation_lang, CONTINUATION_QUESTION
)
await self._save_session(
chat_id, merged_params, updated_turn_count, awaiting_continuation=True
Expand Down Expand Up @@ -314,6 +316,7 @@ async def stream_run_turn(
awaiting_continuation: bool = False,
continuation_turn: int = CONTINUATION_TURN,
session_language: str = "en",
continuation_language: Optional[str] = None,
) -> tuple[AgenticLoopResult, List[str]]:
"""Process one user turn like :meth:`run_turn` but stream clarifying_question tokens.
Expand Down Expand Up @@ -454,8 +457,9 @@ async def stream_run_turn(
turn_count,
chat_id,
)
effective_continuation_lang = continuation_language or session_language
continuation_q = _CONTINUATION_QUESTIONS.get(
session_language, CONTINUATION_QUESTION
effective_continuation_lang, CONTINUATION_QUESTION
)
await self._save_session(
chat_id, merged_params, updated_turn_count, awaiting_continuation=True
Expand Down
31 changes: 29 additions & 2 deletions src/tool_classifier/api_response_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ class APIResponseFormatterSignature(dspy.Signature):
- IGNORE the language of user_query for output language decisions — short follow-up
messages are unreliable indicators. Always use response_language.

If custom_instructions is non-empty, follow those rules with HIGHEST PRIORITY —
they override defaults (e.g. language policy, tone, formatting style).

Rules:
- Format data in a readable way using bullet points, numbered lists, or natural prose.
Do NOT return raw JSON or wrap content in code blocks.
Expand Down Expand Up @@ -69,6 +72,14 @@ class APIResponseFormatterSignature(dspy.Signature):
"Always use this — do not infer language from api_response content."
)
)
custom_instructions: str = dspy.InputField(
desc=(
"Optional system-level instructions configured by the organisation "
"(e.g. 'Always respond in Estonian', 'Use structured format'). "
"Empty string when no custom config is active. "
"When non-empty, follow these rules with highest priority."
)
)

formatted_answer: str = dspy.OutputField(
desc=(
Expand All @@ -95,10 +106,17 @@ class APIResponseFormatterSignature(dspy.Signature):
class APIResponseFormatterModule(dspy.Module):
"""DSPy Module that converts raw API JSON responses into natural-language answers."""

def __init__(self) -> None:
"""Initialize formatter with a direct DSPy Predict."""
def __init__(self, custom_instructions: str = "") -> None:
"""Initialize formatter with a direct DSPy Predict.

Args:
custom_instructions: Optional organisation-level prompt rules (e.g. language
policy). Passed verbatim to the DSPy predictor on every call. Defaults
to empty string (no custom config).
"""
super().__init__()
self.formatter = dspy.Predict(APIResponseFormatterSignature)
self._custom_instructions = custom_instructions

def forward(
self,
Expand Down Expand Up @@ -131,6 +149,7 @@ def forward(
api_response=normalized,
endpoint_description=endpoint_description,
response_language=response_language,
custom_instructions=self._custom_instructions,
)
return result.formatted_answer # type: ignore[no-any-return]

Expand Down Expand Up @@ -195,6 +214,7 @@ async def stream_forward(
if detected_language in _FORMATTER_ERROR_MESSAGES
else "en"
)
output_stream = None
try:
normalized = self._normalize_response(api_response)
normalized = self._annotate_empty(normalized)
Expand All @@ -207,6 +227,7 @@ async def stream_forward(
api_response=normalized,
endpoint_description=endpoint_description,
response_language=response_language,
custom_instructions=self._custom_instructions,
)

stream_started = False
Expand Down Expand Up @@ -255,6 +276,12 @@ async def stream_forward(
f"APIResponseFormatterModule.stream_forward failed: {e}", exc_info=True
)
yield get_localized_message(_FORMATTER_ERROR_MESSAGES, safe_language)
finally:
if output_stream is not None:
try:
await output_stream.aclose()
except Exception as cleanup_error:
logger.debug(f"Error during stream cleanup: {cleanup_error}")

# ------------------------------------------------------------------

Expand Down
99 changes: 94 additions & 5 deletions src/tool_classifier/param_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import json
import re
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, TypedDict

Expand All @@ -15,6 +16,36 @@

_MAX_HISTORY_TURNS = 5

# Regex patterns to strip format hints from parameter descriptions before
# they are fed to the question-generation prompt. This prevents the LLM
# from including format instructions (e.g. "YYYY-MM-DD") in its questions.
_FORMAT_HINT_PATTERNS: List[re.Pattern[str]] = [
# Parenthesised format hints: (YYYY-MM-DD), (ISO 8601), (2-letter code), (HH:MM:SS)
re.compile(
r"\s*\([^)]*(?:YYYY|MM|DD|HH|SS|ISO\s*\d*|letter|format)[^)]*\)",
re.IGNORECASE,
),
# Trailing phrases: "in format YYYY-MM-DD" or "in the format YYYY-MM-DD"
re.compile(r"\s*,?\s*in\s+(?:the\s+)?format\s+\S+", re.IGNORECASE),
]


def _strip_format_hints(description: str) -> str:
"""Remove format hints from a parameter description.

Strips patterns such as ``(YYYY-MM-DD)``, ``(ISO 8601)``,
``(2-letter code)``, ``(HH:MM:SS)``, and trailing
``in the format YYYY-MM-DD`` phrases before the schema is passed to the
LLM for both extraction and question generation. This prevents format
instructions from leaking into clarifying questions (e.g. "What date?
(YYYY-MM-DD)"). Type coercion is handled independently by
:meth:`~ParamExtractionModule._validate_param_type`, so the format hints
are not needed by the extractor.
"""
for pattern in _FORMAT_HINT_PATTERNS:
description = pattern.sub("", description)
return description.strip()


class ParamExtractionResult(TypedDict):
"""Return contract for ParamExtractionModule.forward()."""
Expand All @@ -34,6 +65,9 @@ class ParamExtractionSignature(dspy.Signature):
short follow-up messages ("I'm not sure", "2026-01-01") are unreliable indicators.
Always use session_language.

If custom_instructions is non-empty, follow those rules with HIGHEST PRIORITY —
they override defaults (e.g. language policy, tone) for the clarifying_question output.

Extraction rules:
- Extract values for ALL parameters listed in params_schema that appear in user_message
or conversation_history, regardless of whether they are already in already_collected
Expand All @@ -42,6 +76,12 @@ class ParamExtractionSignature(dspy.Signature):
- Only skip extraction for a param if the user has NOT mentioned it at all in this turn
- Validate types: dates must be ISO 8601 (YYYY-MM-DD), integers must be whole numbers,
numbers must be numeric, booleans must be true or false
- SINGLE-VALUE ASSIGNMENT RULE: When the user's message contains exactly ONE value of a
given type (e.g. one date) and MULTIPLE required parameters of the same type are still
missing (e.g. both startDate and endDate are missing), assign that single value to the
FIRST such missing required parameter in the order they appear in params_schema — never
to a later one. For example, if startDate appears before endDate in params_schema and
both are missing, a lone date like "2026-04-01" must be assigned to startDate, not endDate.

missing_required rules:
- List every required parameter (required=true in schema) whose value is absent
Expand All @@ -59,6 +99,11 @@ class ParamExtractionSignature(dspy.Signature):
- Use each missing parameter's description field to phrase the question naturally
(e.g., "Which country and date would you like to use?" not "Provide countryIsoCode and startDate")
- Never expose raw parameter names (camelCase identifiers) to the user
- NEVER include format requirements, expected formats, format examples, or
structural hints (such as "YYYY-MM-DD", "ISO 8601", "2-letter code",
"in the format...") in the question — only ask WHAT information is needed,
not HOW it should be formatted. The system handles format conversion
internally from any natural-language input the user provides.
"""

user_message: str = dspy.InputField(
Expand All @@ -85,6 +130,14 @@ class ParamExtractionSignature(dspy.Signature):
"still extract the new value — corrections are allowed."
)
)
custom_instructions: str = dspy.InputField(
desc=(
"Optional system-level instructions configured by the organisation "
"(e.g. 'Always respond in Estonian', 'Use formal tone'). "
"Empty string when no custom config is active. "
"When non-empty, follow these rules with highest priority for the clarifying_question."
)
)

extracted_params: str = dspy.OutputField(
desc='Valid JSON object of newly extracted parameters only: {"param_name": value}. Empty object {} if nothing new found.'
Expand All @@ -93,17 +146,29 @@ class ParamExtractionSignature(dspy.Signature):
desc='Valid JSON array of required parameter names still missing after extraction: ["param1", "param2"]. Empty array [] if all required params are satisfied.'
)
clarifying_question: str = dspy.OutputField(
desc='A single natural-language question that asks for ALL missing parameters at once, or the literal string "none" if all required params are collected.'
desc=(
"A single natural-language question that asks for ALL missing parameters "
'at once, or the literal string "none" if all required params are collected. '
'Never include format instructions or examples (e.g. "YYYY-MM-DD", '
'"ISO 8601", "2-letter code") — only ask what information is needed.'
)
)


class ParamExtractionModule(dspy.Module):
"""DSPy Module for API parameter extraction from natural language."""

def __init__(self) -> None:
"""Initialize param extraction module with Predict (direct prediction)."""
def __init__(self, custom_instructions: str = "") -> None:
"""Initialize param extraction module with Predict (direct prediction).

Args:
custom_instructions: Optional organisation-level prompt rules (e.g. language
policy). Passed verbatim to the DSPy predictor on every call. Defaults
to empty string (no custom config).
"""
super().__init__()
self.extractor = dspy.Predict(ParamExtractionSignature)
self._custom_instructions = custom_instructions

def forward(
self,
Expand All @@ -130,7 +195,13 @@ def forward(
already_collected = already_collected or {}

history_text = self._format_conversation_history(conversation_history)
params_schema_json = json.dumps(params_schema, ensure_ascii=False)
sanitized_schema = [
{**p, "description": _strip_format_hints(p.get("description", ""))}
if isinstance(p, dict)
else p
for p in params_schema
]
params_schema_json = json.dumps(sanitized_schema, ensure_ascii=False)
already_collected_json = json.dumps(already_collected, ensure_ascii=False)

result = None
Expand All @@ -141,6 +212,7 @@ def forward(
session_language=session_language,
params_schema=params_schema_json,
already_collected=already_collected_json,
custom_instructions=self._custom_instructions,
)
return self._parse_prediction(result, params_schema, already_collected)

Expand Down Expand Up @@ -206,9 +278,16 @@ async def stream_forward(
already_collected = already_collected or {}

history_text = self._format_conversation_history(conversation_history)
params_schema_json = json.dumps(params_schema, ensure_ascii=False)
sanitized_schema = [
{**p, "description": _strip_format_hints(p.get("description", ""))}
if isinstance(p, dict)
else p
for p in params_schema
]
params_schema_json = json.dumps(sanitized_schema, ensure_ascii=False)
already_collected_json = json.dumps(already_collected, ensure_ascii=False)

output_stream = None
try:
stream_predictor = self._get_stream_predictor()
output_stream = stream_predictor(
Expand All @@ -217,6 +296,7 @@ async def stream_forward(
session_language=session_language,
params_schema=params_schema_json,
already_collected=already_collected_json,
custom_instructions=self._custom_instructions,
)

tokens: List[str] = []
Expand Down Expand Up @@ -273,6 +353,15 @@ async def stream_forward(
logger.exception(f"ParamExtractionModule.stream_forward failed: {e}")
return [], self._safe_defaults(params_schema, already_collected)

finally:
if output_stream is not None:
try:
await output_stream.aclose()
except Exception as cleanup_error:
logger.debug(
f"Error during param extraction stream cleanup: {cleanup_error}"
)

# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
Expand Down
Loading
Loading