-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_34k_audit2_next.py
More file actions
85 lines (67 loc) · 3.1 KB
/
run_34k_audit2_next.py
File metadata and controls
85 lines (67 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
import time
import os
import sys
from google.colab import drive
# 1. Mount Drive
drive.mount('/content/drive')
# 2. Setup Pathing
DRIVE_MODULES_FOLDER = '34Batch'
MODULES_FULL_PATH = os.path.join('/content/drive/MyDrive', DRIVE_MODULES_FOLDER)
if MODULES_FULL_PATH not in sys.path:
sys.path.append(MODULES_FULL_PATH)
# 3. Import Custom Functions
# We add verify_coding to our imports (we will create this script next)
from coding_logic_34 import code_transcript
from verifier_logic import verify_coding
from preprocessing_util import clean_raw_text
# --- CONFIGURATION ---
INPUT_FILE = '/content/drive/MyDrive/34Batch/MasterList_Final.csv'
BATCH_SIZE = 50 # We can set this higher now because each call is atomic
START_ROW = 0
SAVE_INTERVAL = 5
OUTPUT_FILE = f'/content/drive/MyDrive/34Batch/Atomic_Coded_{START_ROW}_to_{START_ROW + BATCH_SIZE}.csv'
def run_atomic_multi_agent_process():
print(f"🚀 Starting Atomic Multi-Agent Process: Rows {START_ROW} to {START_ROW + BATCH_SIZE}")
# Load slice
try:
df = pd.read_csv(INPUT_FILE, skiprows=range(1, START_ROW + 1), nrows=BATCH_SIZE)
except Exception as e:
print(f"❌ Error: {e}")
return
results = []
for index, row in df.iterrows():
study_id = row['StudyID']
transcript_text = row['OriginalTranscript']
# ATOMIC STEP 1: INITIAL CODING
try:
# We call the coder
ai_output, thoughts = code_transcript(transcript_text)
# ATOMIC STEP 2: VERIFICATION (The "Glass Box" Audit)
# We pass the transcript AND the coder's output to the Verifier
is_valid, feedback = verify_coding(transcript_text, ai_output)
# ATOMIC STEP 3: RE-CODING (If Verifier finds a logic error)
if not is_valid:
print(f"⚠️ Verifier flagged StudyID {study_id}. Retrying with feedback...")
# We send the feedback back to the coder for a one-time correction
ai_output, thoughts = code_transcript(transcript_text, feedback=feedback)
results.append({
'StudyID': study_id,
'OriginalTranscript': transcript_text,
'Final_Code': ai_output,
'Audit_Feedback': feedback if not is_valid else "PASS",
'AI_Thoughts': thoughts,
'Processed_At': time.strftime("%Y-%m-%d %H:%M:%S")
})
print(f"✅ Verified & Saved StudyID {study_id}")
except Exception as e:
print(f"⚠️ Critical Failure on {study_id}: {e}")
results.append({'StudyID': study_id, 'Final_Code': "ERROR", 'AI_Thoughts': str(e)})
# Save checkpoint
if (index + 1) % SAVE_INTERVAL == 0:
pd.DataFrame(results).to_csv(OUTPUT_FILE, index=False)
print(f"💾 Checkpoint: {index + 1} rows.")
time.sleep(2) # Keeping it polite for the API
pd.DataFrame(results).to_csv(OUTPUT_FILE, index=False)
print(f"🏁 Process Complete. Results: {OUTPUT_FILE}")
run_atomic_multi_agent_process()