Merge pull request #144 from rootcodelabs/wip

nuwangeek · web-flow · commit 7b09747bad41 · 2026-03-25T11:06:42.000+05:30
Get update from rootcodelabs/LLM-Module wip into rootcodelabs/LLM-Module llm-368
diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx
@@ -1,7 +1,7 @@
 import { useMutation, useQuery } from '@tanstack/react-query';
 import { Button, FormSelect, FormTextarea, Collapsible } from 'components';
 import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner';
-import { FC, useState } from 'react';
+import { ComponentPropsWithoutRef, FC, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 import ReactMarkdown from 'react-markdown';
 import remarkGfm from 'remark-gfm';
@@ -87,6 +87,17 @@ const TestLLM: FC = () => {
     }));
   };
 
+  const markdownComponents = {
+    ol: ({children}: any) => (
+      <ol style={{ paddingLeft: '1.5rem', listStyleType: 'decimal' }}>
+        {children}
+      </ol>
+    ),
+    a: (props: ComponentPropsWithoutRef<"a">) => (
+      <a {...props} target="_blank" rel="noopener noreferrer" />
+    ),
+  };
+
   return (
     <div>
       {isLoadingConnections ? (
@@ -141,7 +152,7 @@ const TestLLM: FC = () => {
               <div className="result-item">
                 <strong>Response:</strong>
                 <div className="response-content">
-                  <ReactMarkdown remarkPlugins={[remarkGfm]}>
+                  <ReactMarkdown remarkPlugins={[remarkGfm]} components={markdownComponents}>
                     {inferenceResult.content}
                   </ReactMarkdown>
                 </div>
@@ -159,7 +170,7 @@ const TestLLM: FC = () => {
                               <strong>Rank {contextItem.rank}</strong>
                             </div>
                             <div className="context-content">
-                              <ReactMarkdown remarkPlugins={[remarkGfm]}>
+                              <ReactMarkdown remarkPlugins={[remarkGfm]} components={markdownComponents}>
                                 {contextItem.chunkRetrieved}
                               </ReactMarkdown>
                             </div>
diff --git a/docker-compose-ec2.yml b/docker-compose-ec2.yml
@@ -128,7 +128,8 @@ services:
       - REACT_APP_RUUTER_API_URL=https://est-rag-rtc.rootcode.software/ruuter-public
       - REACT_APP_RUUTER_PRIVATE_API_URL=https://est-rag-rtc.rootcode.software/ruuter-private  
       - REACT_APP_CUSTOMER_SERVICE_LOGIN=https://est-rag-rtc.rootcode.software/authentication-layer/et/dev-auth
-      - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 ws://localhost https://vault-agent-gui:8202 https://est-rag-rtc.rootcode.software;
+      - REACT_APP_NOTIFICATION_NODE_URL=https://est-rag-rtc.rootcode.software/notifications-node
+      - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 http://localhost:4040 https://vault-agent-gui:8202 ws://localhost https://est-rag-rtc.rootcode.software;
       - DEBUG_ENABLED=true
       - CHOKIDAR_USEPOLLING=true
       - PORT=3001
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -127,7 +127,8 @@ services:
       - REACT_APP_RUUTER_API_URL=http://localhost:8086
       - REACT_APP_RUUTER_PRIVATE_API_URL=http://localhost:8088
       - REACT_APP_CUSTOMER_SERVICE_LOGIN=http://localhost:3004/et/dev-auth
-      - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 https://vault-agent-gui:8202 ws://localhost https://est-rag-rtc.rootcode.software;
+      - REACT_APP_NOTIFICATION_NODE_URL=http://localhost:4040
+      - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 http://localhost:4040 https://vault-agent-gui:8202 ws://localhost https://est-rag-rtc.rootcode.software;
       - DEBUG_ENABLED=true
       - CHOKIDAR_USEPOLLING=true
       - PORT=3001
diff --git a/pyproject.toml b/pyproject.toml
@@ -90,6 +90,24 @@ ignore = []
 fixable = ["ALL"]
 unfixable = []
 
+# Per-file ignores for special cases
+[tool.ruff.lint.per-file-ignores]
+"tests/**/*.py" = [
+    "ANN",    # Ignore all missing type annotations (ANN001, ANN201, etc.)
+    "T201",   # Allow print statements
+]
+
+"src/models/request_models.py" = ["N815"]  # camelCase fields required for API contract
+"src/optimization/optimized_module_loader.py" = ["N815"]  # Pydantic model fields
+"src/optimization/optimizers/generator_optimizer.py" = ["N815"]  # Pydantic model fields
+"src/response_generator/response_generate.py" = ["N815", "ANN401"]  # Pydantic model fields + DSPy streamify Any type
+
+# Library interface patterns - legitimate Any usage
+"src/contextual_retrieval/contextual_retrieval_api_client.py" = ["ANN401"]  # httpx **kwargs pass-through
+"src/guardrails/dspy_nemo_adapter.py" = ["ANN401"]  # LangChain LLM interface + DSPy dynamic types
+"src/llm_orchestrator_config/context_manager.py" = ["ANN401"]  # MockResponse with dynamic attributes
+"src/optimization/metrics/*.py" = ["ANN401"]  # DSPy optimizer trace parameter (internal type)
+"byk-stack-setup/script.py" = ["T201"]  # CLI script uses print 
 
 [tool.ruff.format]
 # Like Black, use double quotes for strings.
@@ -123,4 +141,4 @@ exclude = [
 ]
 
 # --- Global strictness ---
-typeCheckingMode = "standard"      # Standard typechecking mode
+typeCheckingMode = "standard"      # Standard typechecking mode
diff --git a/src/contextual_retrieval/bm25_search.py b/src/contextual_retrieval/bm25_search.py
@@ -5,7 +5,7 @@
 when collection data changes.
 """
 
-from typing import List, Dict, Any, Optional, Set
+from typing import List, Dict, Any, Optional, Set, TYPE_CHECKING
 from loguru import logger
 from rank_bm25 import BM25Okapi
 import re
@@ -20,13 +20,16 @@
 )
 from contextual_retrieval.config import ConfigLoader, ContextualRetrievalConfig
 
+if TYPE_CHECKING:
+    from contextual_retrieval.contextual_retrieval_api_client import HTTPClientManager
+
 
 class SmartBM25Search:
     """In-memory BM25 search with smart refresh capabilities."""
 
     def __init__(
         self, qdrant_url: str, config: Optional["ContextualRetrievalConfig"] = None
-    ):
+    ) -> None:
         self.qdrant_url = qdrant_url
         self._config = config if config is not None else ConfigLoader.load_config()
         self._http_client_manager = None
@@ -40,7 +43,7 @@ def __init__(
         # Strong references to background tasks to prevent premature GC
         self._background_tasks: Set[asyncio.Task[None]] = set()
 
-    async def _get_http_client_manager(self):
+    async def _get_http_client_manager(self) -> "HTTPClientManager":
         """Get the HTTP client manager instance."""
         if self._http_client_manager is None:
             self._http_client_manager = await get_http_client_manager()
@@ -356,7 +359,7 @@ def _tokenize_text(self, text: str) -> List[str]:
         tokens = self.tokenizer_pattern.findall(text.lower())
         return tokens
 
-    async def close(self):
+    async def close(self) -> None:
         """Close HTTP client."""
         if self._http_client_manager:
             await self._http_client_manager.close()
diff --git a/src/contextual_retrieval/constants.py b/src/contextual_retrieval/constants.py
@@ -15,7 +15,7 @@ class HttpClientConstants:
     DEFAULT_FAILURE_THRESHOLD = 5
     DEFAULT_RECOVERY_TIMEOUT = 60.0
 
-    # Timeouts (seconds)
+    # Timeouts in seconds
     DEFAULT_READ_TIMEOUT = 30.0
     DEFAULT_CONNECT_TIMEOUT = 10.0
     DEFAULT_WRITE_TIMEOUT = 10.0
diff --git a/src/contextual_retrieval/contextual_retrieval_api_client.py b/src/contextual_retrieval/contextual_retrieval_api_client.py
@@ -24,7 +24,7 @@
 class ServiceResilienceManager:
     """Service resilience manager with circuit breaker functionality for HTTP requests."""
 
-    def __init__(self, config: Optional["ContextualRetrievalConfig"] = None):
+    def __init__(self, config: Optional["ContextualRetrievalConfig"] = None) -> None:
         # Load configuration if not provided
         if config is None:
             config = ConfigLoader.load_config()
@@ -81,7 +81,7 @@ class HTTPClientManager:
     _instance: Optional["HTTPClientManager"] = None
     _lock = asyncio.Lock()
 
-    def __init__(self, config: Optional["ContextualRetrievalConfig"] = None):
+    def __init__(self, config: Optional["ContextualRetrievalConfig"] = None) -> None:
         """Initialize HTTP client manager."""
         # Load configuration if not provided
         self._config = config if config is not None else ConfigLoader.load_config()
@@ -169,7 +169,7 @@ async def get_client(
                             SecureErrorHandler.sanitize_error_message(
                                 e, "HTTP client initialization"
                             )
-                        )
+                        ) from e
 
         return self._client
 
diff --git a/src/contextual_retrieval/contextual_retriever.py b/src/contextual_retrieval/contextual_retriever.py
@@ -43,7 +43,7 @@ def __init__(
         config_path: Optional[str] = None,
         llm_service: Optional["LLMOrchestrationService"] = None,
         shared_bm25: Optional[SmartBM25Search] = None,
-    ):
+    ) -> None:
         """
         Initialize contextual retriever.
 
@@ -120,7 +120,7 @@ async def initialize(self) -> bool:
             logger.error(f"Failed to initialize Contextual Retriever: {e}")
             return False
 
-    def _get_session_llm_service(self):
+    def _get_session_llm_service(self) -> "LLMOrchestrationService":
         """
         Get cached LLM service for current retrieval session.
         Uses injected service if available, creates new instance as fallback.
@@ -140,7 +140,7 @@ def _get_session_llm_service(self):
 
         return self._session_llm_service
 
-    def _clear_session_cache(self):
+    def _clear_session_cache(self) -> None:
         """Clear cached connections at end of retrieval session."""
         if self._session_llm_service is not None:
             logger.debug("Clearing session LLM service cache")
@@ -374,7 +374,9 @@ async def _execute_batch_query_searches(
                 self._search_single_query_with_embedding(
                     query, i, embedding, collections, limit
                 )
-                for i, (query, embedding) in enumerate(zip(queries, batch_embeddings))
+                for i, (query, embedding) in enumerate(
+                    zip(queries, batch_embeddings, strict=True)
+                )
             ]
 
             # Execute all searches in parallel
@@ -621,7 +623,7 @@ async def health_check(self) -> Dict[str, Any]:
 
         return health_status
 
-    async def close(self):
+    async def close(self) -> None:
         """Clean up resources."""
         try:
             await self.provider_detection.close()
diff --git a/src/contextual_retrieval/provider_detection.py b/src/contextual_retrieval/provider_detection.py
@@ -7,7 +7,7 @@
 - No hardcoded weights or preferences
 """
 
-from typing import List, Optional, Dict, Any
+from typing import List, Optional, Dict, Any, TYPE_CHECKING
 from loguru import logger
 from contextual_retrieval.contextual_retrieval_api_client import get_http_client_manager
 from contextual_retrieval.error_handler import SecureErrorHandler
@@ -18,18 +18,21 @@
 )
 from contextual_retrieval.config import ConfigLoader, ContextualRetrievalConfig
 
+if TYPE_CHECKING:
+    from contextual_retrieval.contextual_retrieval_api_client import HTTPClientManager
+
 
 class DynamicProviderDetection:
     """Dynamic collection selection without hardcoded preferences."""
 
     def __init__(
         self, qdrant_url: str, config: Optional["ContextualRetrievalConfig"] = None
-    ):
+    ) -> None:
         self.qdrant_url = qdrant_url
         self._config = config if config is not None else ConfigLoader.load_config()
         self._http_client_manager = None
 
-    async def _get_http_client_manager(self):
+    async def _get_http_client_manager(self) -> "HTTPClientManager":
         """Get the HTTP client manager instance."""
         if self._http_client_manager is None:
             self._http_client_manager = await get_http_client_manager()
@@ -212,7 +215,7 @@ async def get_collection_stats(self) -> Dict[str, Any]:
 
         return stats
 
-    async def close(self):
+    async def close(self) -> None:
         """Close HTTP client."""
         if self._http_client_manager:
             await self._http_client_manager.close()
diff --git a/src/contextual_retrieval/qdrant_search.py b/src/contextual_retrieval/qdrant_search.py
@@ -5,7 +5,7 @@
 existing contextual embeddings created by the vector indexer.
 """
 
-from typing import List, Dict, Any, Optional, Protocol
+from typing import List, Dict, Any, Optional, Protocol, TYPE_CHECKING
 from loguru import logger
 import asyncio
 from contextual_retrieval.contextual_retrieval_api_client import get_http_client_manager
@@ -17,6 +17,9 @@
 )
 from contextual_retrieval.config import ConfigLoader, ContextualRetrievalConfig
 
+if TYPE_CHECKING:
+    from contextual_retrieval.contextual_retrieval_api_client import HTTPClientManager
+
 
 class LLMServiceProtocol(Protocol):
     """Protocol defining the interface required from LLM service for embedding operations."""
@@ -47,12 +50,12 @@ class QdrantContextualSearch:
 
     def __init__(
         self, qdrant_url: str, config: Optional["ContextualRetrievalConfig"] = None
-    ):
+    ) -> None:
         self.qdrant_url = qdrant_url
         self._config = config if config is not None else ConfigLoader.load_config()
         self._http_client_manager = None
 
-    async def _get_http_client_manager(self):
+    async def _get_http_client_manager(self) -> "HTTPClientManager":
         """Get the HTTP client manager instance."""
         if self._http_client_manager is None:
             self._http_client_manager = await get_http_client_manager()
@@ -345,7 +348,7 @@ def get_embeddings_for_queries_batch(
             logger.error(f"Failed to get batch embeddings: {e}")
             return None
 
-    async def close(self):
+    async def close(self) -> None:
         """Close HTTP client."""
         if self._http_client_manager:
             await self._http_client_manager.close()
diff --git a/src/contextual_retrieval/rank_fusion.py b/src/contextual_retrieval/rank_fusion.py
@@ -14,7 +14,7 @@
 class DynamicRankFusion:
     """Dynamic score fusion without hardcoded collection weights."""
 
-    def __init__(self, config: Optional["ContextualRetrievalConfig"] = None):
+    def __init__(self, config: Optional["ContextualRetrievalConfig"] = None) -> None:
         """
         Initialize rank fusion with configuration.
 
@@ -184,7 +184,7 @@ def _reciprocal_rank_fusion(
 
         # Calculate final fused scores
         fused_results: List[Dict[str, Any]] = []
-        for chunk_id, data in chunk_scores.items():
+        for data in chunk_scores.values():
             chunk = data["chunk"].copy()
 
             # Calculate fused RRF score
diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py
@@ -281,6 +281,12 @@ async def orchestrate_llm_request(
         # Process the request
         response = await orchestration_service.process_orchestration_request(request)
 
+        buttons_present = bool(response.buttons)
+        buttons_count = len(response.buttons) if response.buttons else 0
+        logger.info(
+            f"[orchestrate] buttons in response for chatId {request.chatId}: "
+            f"present={buttons_present}, count={buttons_count}"
+        )
         logger.info(f"Successfully processed request for chatId: {request.chatId}")
         return response
 
@@ -364,6 +370,10 @@ async def test_orchestrate_llm_request(
 
         # If response is already TestOrchestrationResponse (when environment is testing), return it directly
         if isinstance(response, TestOrchestrationResponse):
+            buttons_count = len(response.buttons) if response.buttons else 0
+            logger.info(
+                f"[test_orchestrate] buttons present in response: {buttons_count}"
+            )
             logger.info(
                 f"Successfully processed test request for environment: {request.environment}"
             )
@@ -375,9 +385,9 @@ async def test_orchestrate_llm_request(
             questionOutOfLLMScope=response.questionOutOfLLMScope,
             inputGuardFailed=response.inputGuardFailed,
             content=response.content,
+            buttons=response.buttons,
             chunks=None,  # OrchestrationResponse doesn't have chunks
         )
-
         logger.info(
             f"Successfully processed test request for environment: {request.environment}"
         )
diff --git a/src/models/request_models.py b/src/models/request_models.py
diff --git a/src/utils/input_sanitizer.py b/src/utils/input_sanitizer.py
diff --git a/tests/test_input_sanitizer.py b/tests/test_input_sanitizer.py