Skip to content

Commit ba53c52

Browse files
committed
fix: full eval logic with code runner
1 parent 138f54b commit ba53c52

11 files changed

Lines changed: 1468 additions & 1135 deletions

File tree

evaluation_function/analyzer/code_runner.py

Lines changed: 72 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
- Handles ReturnException at global scope
88
"""
99

10-
from typing import List
10+
from typing import List, Optional
1111
from ..schemas.input_schema import ExecutionTestCase
1212
from .interpreter import Interpreter, ReturnException
13-
from ..schemas.output_schema import CodeCorrectnessResult, TestCaseResult, ParseResult
13+
from ..schemas.output_schema import CodeCorrectnessResult, InterpreterResult, TestCaseResult, ParseResult
1414
from ..parser.parser import PseudocodeParser
1515

1616

@@ -25,9 +25,10 @@ class CodeRunner:
2525
4. Return aggregated results
2626
"""
2727

28-
def __init__(self, parser: PseudocodeParser, interpreter: Interpreter):
28+
def __init__(self, parser: Optional[PseudocodeParser], interpreter: Interpreter):
2929
self.parser = parser
3030
self.interpreter = interpreter
31+
assert self.interpreter is not None, "Interpreter instance is required for CodeRunner"
3132

3233
def run(
3334
self,
@@ -44,6 +45,7 @@ def run(
4445
Returns:
4546
CodeCorrectnessResult with parse status and execution results
4647
"""
48+
assert self.parser is not None, "parser instance is required for CodeRunner to run from source"
4749

4850
# -----------------------------------
4951
# 1. Parse
@@ -61,7 +63,10 @@ def run(
6163
is_correct=False,
6264
feedback="Parsing failed. Fix syntax errors before execution."
6365
)
66+
return self.run_with_parse_result(parse_result, test_cases)
6467

68+
69+
def run_with_parse_result(self, parse_result: ParseResult, test_cases: List[ExecutionTestCase]) -> CodeCorrectnessResult:
6570
# -----------------------------------
6671
# 2. Execute Test Cases
6772
# -----------------------------------
@@ -80,28 +85,28 @@ def run(
8085
error_messages = []
8186

8287
if test_case.expected_variables is not None:
83-
if result["variables"] != test_case.expected_variables:
88+
if result.variables != test_case.expected_variables:
8489
passed = False
8590
error_messages.append(
8691
f"Variables mismatch: expected {test_case.expected_variables}, "
87-
f"got {result['variables']}"
92+
f"got {result.variables}"
8893
)
8994

9095
if test_case.expected_output is not None:
91-
if result["output"] != test_case.expected_output:
96+
if result.output != test_case.expected_output:
9297
passed = False
9398
error_messages.append(
9499
f"Output mismatch: expected {test_case.expected_output}, "
95-
f"got {result['output']}"
100+
f"got {result.output}"
96101
)
97102

98103
execution_results.append(
99104
TestCaseResult(
100105
input_data=test_case.initial_variables,
101-
expected_output={
102-
"variables": test_case.expected_variables,
103-
"output": test_case.expected_output
104-
},
106+
expected_output=InterpreterResult(
107+
variables=test_case.expected_variables,
108+
output=test_case.expected_output
109+
),
105110
actual_output=result,
106111
passed=passed,
107112
error_message="; ".join(error_messages) if error_messages else None
@@ -114,11 +119,14 @@ def run(
114119
execution_results.append(
115120
TestCaseResult(
116121
input_data=test_case.initial_variables,
117-
expected_output={
118-
"variables": test_case.expected_variables,
119-
"output": test_case.expected_output
120-
},
121-
actual_output=None,
122+
expected_output=InterpreterResult(
123+
variables=test_case.expected_variables,
124+
output=test_case.expected_output
125+
),
126+
actual_output=InterpreterResult(
127+
variables={},
128+
output=[]
129+
),
122130
passed=False,
123131
error_message=f"Unexpected return statement (returned {e.value})"
124132
)
@@ -128,11 +136,14 @@ def run(
128136
execution_results.append(
129137
TestCaseResult(
130138
input_data=test_case.initial_variables,
131-
expected_output={
132-
"variables": test_case.expected_variables,
133-
"output": test_case.expected_output
134-
},
135-
actual_output=None,
139+
expected_output=InterpreterResult(
140+
variables=test_case.expected_variables,
141+
output=test_case.expected_output
142+
),
143+
actual_output=InterpreterResult(
144+
variables={},
145+
output=[]
146+
),
136147
passed=False,
137148
error_message=f"Variable or function not defined: {str(e)}"
138149
)
@@ -142,11 +153,14 @@ def run(
142153
execution_results.append(
143154
TestCaseResult(
144155
input_data=test_case.initial_variables,
145-
expected_output={
146-
"variables": test_case.expected_variables,
147-
"output": test_case.expected_output
148-
},
149-
actual_output=None,
156+
expected_output=InterpreterResult(
157+
variables=test_case.expected_variables,
158+
output=test_case.expected_output
159+
),
160+
actual_output=InterpreterResult(
161+
variables={},
162+
output=[]
163+
),
150164
passed=False,
151165
error_message="Division by zero"
152166
)
@@ -156,11 +170,14 @@ def run(
156170
execution_results.append(
157171
TestCaseResult(
158172
input_data=test_case.initial_variables,
159-
expected_output={
160-
"variables": test_case.expected_variables,
161-
"output": test_case.expected_output
162-
},
163-
actual_output=None,
173+
expected_output=InterpreterResult(
174+
variables=test_case.expected_variables,
175+
output=test_case.expected_output
176+
),
177+
actual_output=InterpreterResult(
178+
variables={},
179+
output=[]
180+
),
164181
passed=False,
165182
error_message=f"Array index error: {str(e)}"
166183
)
@@ -170,11 +187,14 @@ def run(
170187
execution_results.append(
171188
TestCaseResult(
172189
input_data=test_case.initial_variables,
173-
expected_output={
174-
"variables": test_case.expected_variables,
175-
"output": test_case.expected_output
176-
},
177-
actual_output=None,
190+
expected_output=InterpreterResult(
191+
variables=test_case.expected_variables,
192+
output=test_case.expected_output
193+
),
194+
actual_output=InterpreterResult(
195+
variables={},
196+
output=[]
197+
),
178198
passed=False,
179199
error_message=f"Type error: {str(e)}"
180200
)
@@ -184,11 +204,14 @@ def run(
184204
execution_results.append(
185205
TestCaseResult(
186206
input_data=test_case.initial_variables,
187-
expected_output={
188-
"variables": test_case.expected_variables,
189-
"output": test_case.expected_output
190-
},
191-
actual_output=None,
207+
expected_output=InterpreterResult(
208+
variables=test_case.expected_variables,
209+
output=test_case.expected_output
210+
),
211+
actual_output=InterpreterResult(
212+
variables={},
213+
output=[]
214+
),
192215
passed=False,
193216
error_message=f"Runtime error: {str(e)}"
194217
)
@@ -199,11 +222,14 @@ def run(
199222
execution_results.append(
200223
TestCaseResult(
201224
input_data=test_case.initial_variables,
202-
expected_output={
203-
"variables": test_case.expected_variables,
204-
"output": test_case.expected_output
205-
},
206-
actual_output=None,
225+
expected_output=InterpreterResult(
226+
variables=test_case.expected_variables,
227+
output=test_case.expected_output
228+
),
229+
actual_output=InterpreterResult(
230+
variables={},
231+
output=[]
232+
),
207233
passed=False,
208234
error_message=f"{type(e).__name__}: {str(e)}"
209235
)
@@ -212,7 +238,6 @@ def run(
212238
# -----------------------------------
213239
# 3. Aggregate Results
214240
# -----------------------------------
215-
216241
all_passed = all(r.passed for r in execution_results)
217242

218243
# Generate feedback

evaluation_function/analyzer/interpreter.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from typing import Any, Dict, List, Optional, Union
1717
from copy import deepcopy
1818

19+
from evaluation_function.schemas.output_schema import InterpreterResult
20+
1921
from ..schemas.input_schema import RuntimeValue
2022
from ..schemas.ast_nodes import *
2123

@@ -49,12 +51,12 @@ def run(
4951
self,
5052
program: ProgramNode,
5153
initial_variables: Optional[Dict[str, RuntimeValue]] = None,
52-
) -> Dict[str, Any]:
54+
) -> InterpreterResult:
5355
"""
5456
Execute a program with optional initial variables.
5557
5658
Returns:
57-
Dict with 'variables' and 'output' keys
59+
InterpreterResult object with 'variables' and 'output' fields
5860
"""
5961
self.variables = deepcopy(initial_variables) if initial_variables else {}
6062
self.functions = {}
@@ -67,11 +69,11 @@ def run(
6769
# Execute global statements
6870
if program.global_statements:
6971
self.execute_block(program.global_statements)
70-
71-
return {
72-
"variables": deepcopy(self.variables),
73-
"output": list(self.output)
74-
}
72+
var = deepcopy(self.variables)
73+
return InterpreterResult(
74+
variables={} if not var else var,
75+
output=[] if not list(self.output) else self.output
76+
)
7577

7678
# -----------------------------------
7779
# Block Execution

0 commit comments

Comments
 (0)