@@ -29,7 +29,8 @@ def load(test_data: dict) -> List[BaseCheck]:
2929 checks .append (ContainsCheck (test_name ))
3030 if test_data .get (DoesNotContainCheck .KEY ):
3131 checks .append (DoesNotContainCheck (test_name ))
32-
32+ if test_data .get (CodeGateEnrichment .KEY ) is not None :
33+ checks .append (CodeGateEnrichment (test_name ))
3334 return checks
3435
3536
@@ -51,11 +52,11 @@ async def run_check(self, parsed_response: str, test_data: dict) -> bool:
5152 similarity = await self ._calculate_string_similarity (
5253 parsed_response , test_data [DistanceCheck .KEY ]
5354 )
55+ logger .info (f"Distance check: - { self .test_name } " )
56+ logger .debug (f"Similarity: { similarity } " )
57+ logger .debug (f"Response: { parsed_response } " )
58+ logger .debug (f"Expected Response: { test_data [DistanceCheck .KEY ]} " )
5459 if similarity < 0.8 :
55- logger .error (f"Test { self .test_name } failed" )
56- logger .error (f"Similarity: { similarity } " )
57- logger .error (f"Response: { parsed_response } " )
58- logger .error (f"Expected Response: { test_data [DistanceCheck .KEY ]} " )
5960 return False
6061 return True
6162
@@ -64,10 +65,10 @@ class ContainsCheck(BaseCheck):
6465 KEY = "contains"
6566
6667 async def run_check (self , parsed_response : str , test_data : dict ) -> bool :
68+ logger .info (f"Contains check: { self .test_name } " )
69+ logger .debug (f"Response: { parsed_response } " )
70+ logger .debug (f"Expected Response to contain: { test_data [ContainsCheck .KEY ]} " )
6771 if test_data [ContainsCheck .KEY ].strip () not in parsed_response :
68- logger .error (f"Test { self .test_name } failed" )
69- logger .error (f"Response: { parsed_response } " )
70- logger .error (f"Expected Response to contain: '{ test_data [ContainsCheck .KEY ]} '" )
7172 return False
7273 return True
7374
@@ -76,11 +77,35 @@ class DoesNotContainCheck(BaseCheck):
7677 KEY = "does_not_contain"
7778
7879 async def run_check (self , parsed_response : str , test_data : dict ) -> bool :
80+ logger .info (f"Does not contain check: { self .test_name } " )
81+ logger .debug (f"Response: { parsed_response } " )
82+ logger .debug (f"Expected Response to not contain: '{ test_data [DoesNotContainCheck .KEY ]} '" )
7983 if test_data [DoesNotContainCheck .KEY ].strip () in parsed_response :
80- logger .error (f"Test { self .test_name } failed" )
81- logger .error (f"Response: { parsed_response } " )
82- logger .error (
83- f"Expected Response to not contain: '{ test_data [DoesNotContainCheck .KEY ]} '"
84- )
8584 return False
8685 return True
86+
87+
88+ class CodeGateEnrichment (BaseCheck ):
89+ KEY = "codegate_enrichment"
90+
91+ async def run_check (self , parsed_response : str , test_data : dict ) -> bool :
92+ direct_response = test_data ["direct_response" ]
93+ logger .info (f"CodeGate enrichment check: - { self .test_name } " )
94+ logger .debug (f"Response (CodeGate): { parsed_response } " )
95+ logger .debug (f"Response (Raw model): { direct_response } " )
96+
97+ # Use the DistanceCheck to compare the two responses
98+ distance_check = DistanceCheck (self .test_name )
99+ are_similar = await distance_check .run_check (
100+ parsed_response , {DistanceCheck .KEY : direct_response }
101+ )
102+
103+ # Check if the response is enriched by CodeGate.
104+ # If it is, there should be a difference in the similarity score.
105+ expect_enrichment = test_data .get (CodeGateEnrichment .KEY ).get ("expect_difference" , False )
106+ if expect_enrichment :
107+ logger .info ("CodeGate enrichment check: Expecting difference" )
108+ return not are_similar
109+ # If the response is not enriched, the similarity score should be the same.
110+ logger .info ("CodeGate enrichment check: Not expecting difference" )
111+ return are_similar
0 commit comments