Skip to content

Commit 06adad5

Browse files
sawenzelalcaliva
authored andcommitted
Change bad-data-interval parsing to account for more requirements
Apparently the separator can change each line and be part of the comment message. Hence, simple csv parsing does not work. (cherry picked from commit e5fb7c3)
1 parent 3bcef60 commit 06adad5

File tree

1 file changed

+22
-12
lines changed

1 file changed

+22
-12
lines changed

MC/bin/o2dpg_sim_workflow_anchored.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import json
1515
import math
1616
import pandas as pd
17-
import csv
1817

1918
# Creates a time anchored MC workflow; positioned within a given run-number (as function of production size etc)
2019

@@ -318,17 +317,28 @@ def exclude_timestamp(ts, orbit, run, filename):
318317
if not os.path.isfile(filename):
319318
return False
320319

321-
# Function to detect the delimiter automatically
322-
def detect_delimiter(file_path):
323-
with open(file_path, 'r') as csvfile:
324-
sample = csvfile.read(1024) # Read a small sample of the file
325-
sniffer = csv.Sniffer()
326-
delimiter = sniffer.sniff(sample).delimiter
327-
return delimiter
328-
return ',' # a reasonable default
329-
330-
# read txt file into a pandas dataframe ---> if this fails catch exception and return
331-
df = pd.read_csv(filename, header=None, names=["Run", "From", "To", "Message"], sep=detect_delimiter(filename))
320+
def parse_file(filename):
321+
parsed_data = []
322+
with open(filename, 'r') as file:
323+
for line in file:
324+
# Split the line into exactly 4 parts (first three numbers + comment)
325+
columns = re.split(r'[,\s;\t]+', line.strip(), maxsplit=3)
326+
327+
if len(columns) < 3:
328+
continue # Skip lines with insufficient columns
329+
330+
try:
331+
# Extract the first three columns as numbers
332+
num1, num2, num3 = map(int, columns[:3]) # Assuming integers in the data
333+
comment = columns[3] if len(columns) > 3 else ""
334+
parsed_data.append({"Run" : num1, "From" : num2, "To" : num3, "Message" : comment})
335+
except ValueError:
336+
continue # Skip lines where first three columns are not numeric
337+
return parsed_data
338+
339+
data = parse_file(filename)
340+
# print (data)
341+
df = pd.DataFrame(data) # convert to data frame for easy handling
332342

333343
# extract data for this run number
334344
filtered = df[df['Run'] == run]

0 commit comments

Comments
 (0)