forked from ShayanTalaei/CHESS
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinput_file.py
More file actions
65 lines (51 loc) · 2.12 KB
/
input_file.py
File metadata and controls
65 lines (51 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import json
import re
def parse_log_file(log_file_path):
with open(log_file_path, 'r') as file:
log_content = file.read()
steps = []
step_pattern = re.compile(r'##############################\s*(Human|AI) at step (.*?)\s*##############################')
token_pattern = re.compile(r'The token count is:\s*(\d+)')
headers = list(step_pattern.finditer(log_content))
for i, header in enumerate(headers):
role = header.group(1)
step_name = header.group(2).strip()
start_index = header.end()
end_index = headers[i+1].start() if i + 1 < len(headers) else len(log_content)
section_text = log_content[start_index:end_index]
token_match = token_pattern.search(section_text)
tokens = int(token_match.group(1)) if token_match else 0
if role == "Human":
steps.append({
"step": step_name,
"input_length": tokens,
"output_length": 0
})
else: # role == "AI"
if steps and steps[-1]["step"] == step_name and steps[-1]["output_length"] == 0:
steps[-1]["output_length"] = tokens
else:
steps.append({
"step": step_name,
"input_length": 0,
"output_length": tokens
})
return steps
def collect_logs(logs_directory):
logs_data = []
for log_file in os.listdir(logs_directory):
if log_file.endswith("formula_1.log"):
log_file_path = os.path.join(logs_directory, log_file)
steps = parse_log_file(log_file_path)
log_name = 'Text2SQLRequest'
logs_data.append({log_name: steps})
return logs_data
def main():
logs_directory = './results/dev/CHESS_IR_CG_UT/mixed_dev_1/2025-04-08T12:58:08.154132/logs'
logs_data = collect_logs(logs_directory)
output_file_path = './input_file_formula1.json'
with open(output_file_path, 'w') as output_file:
json.dump(logs_data, output_file, indent=4)
if __name__ == "__main__":
main()