Skip to content

Commit e5fb7c3

Browse files
committed
Change bad-data-interval parsing to account for more requirements
Apparently the separator can change each line and be part of the comment message. Hence, simple csv parsing does not work.
1 parent 42d7d28 commit e5fb7c3

File tree

1 file changed

+22
-12
lines changed

1 file changed

+22
-12
lines changed

MC/bin/o2dpg_sim_workflow_anchored.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import json
1515
import math
1616
import pandas as pd
17-
import csv
1817

1918
# Creates a time anchored MC workflow; positioned within a given run-number (as function of production size etc)
2019

@@ -318,17 +317,28 @@ def exclude_timestamp(ts, orbit, run, filename):
318317
if not os.path.isfile(filename):
319318
return False
320319

321-
# Function to detect the delimiter automatically
322-
def detect_delimiter(file_path):
323-
with open(file_path, 'r') as csvfile:
324-
sample = csvfile.read(1024) # Read a small sample of the file
325-
sniffer = csv.Sniffer()
326-
delimiter = sniffer.sniff(sample).delimiter
327-
return delimiter
328-
return ',' # a reasonable default
329-
330-
# read txt file into a pandas dataframe ---> if this fails catch exception and return
331-
df = pd.read_csv(filename, header=None, names=["Run", "From", "To", "Message"], sep=detect_delimiter(filename))
320+
def parse_file(filename):
321+
parsed_data = []
322+
with open(filename, 'r') as file:
323+
for line in file:
324+
# Split the line into exactly 4 parts (first three numbers + comment)
325+
columns = re.split(r'[,\s;\t]+', line.strip(), maxsplit=3)
326+
327+
if len(columns) < 3:
328+
continue # Skip lines with insufficient columns
329+
330+
try:
331+
# Extract the first three columns as numbers
332+
num1, num2, num3 = map(int, columns[:3]) # Assuming integers in the data
333+
comment = columns[3] if len(columns) > 3 else ""
334+
parsed_data.append({"Run" : num1, "From" : num2, "To" : num3, "Message" : comment})
335+
except ValueError:
336+
continue # Skip lines where first three columns are not numeric
337+
return parsed_data
338+
339+
data = parse_file(filename)
340+
# print (data)
341+
df = pd.DataFrame(data) # convert to data frame for easy handling
332342

333343
# extract data for this run number
334344
filtered = df[df['Run'] == run]

0 commit comments

Comments
 (0)