|
14 | 14 | import json |
15 | 15 | import math |
16 | 16 | import pandas as pd |
17 | | -import csv |
18 | 17 |
|
19 | 18 | # Creates a time anchored MC workflow; positioned within a given run-number (as function of production size etc) |
20 | 19 |
|
@@ -318,17 +317,28 @@ def exclude_timestamp(ts, orbit, run, filename): |
318 | 317 | if not os.path.isfile(filename): |
319 | 318 | return False |
320 | 319 |
|
321 | | - # Function to detect the delimiter automatically |
322 | | - def detect_delimiter(file_path): |
323 | | - with open(file_path, 'r') as csvfile: |
324 | | - sample = csvfile.read(1024) # Read a small sample of the file |
325 | | - sniffer = csv.Sniffer() |
326 | | - delimiter = sniffer.sniff(sample).delimiter |
327 | | - return delimiter |
328 | | - return ',' # a reasonable default |
329 | | - |
330 | | - # read txt file into a pandas dataframe ---> if this fails catch exception and return |
331 | | - df = pd.read_csv(filename, header=None, names=["Run", "From", "To", "Message"], sep=detect_delimiter(filename)) |
| 320 | + def parse_file(filename): |
| 321 | + parsed_data = [] |
| 322 | + with open(filename, 'r') as file: |
| 323 | + for line in file: |
| 324 | + # Split the line into exactly 4 parts (first three numbers + comment) |
| 325 | + columns = re.split(r'[,\s;\t]+', line.strip(), maxsplit=3) |
| 326 | + |
| 327 | + if len(columns) < 3: |
| 328 | + continue # Skip lines with insufficient columns |
| 329 | + |
| 330 | + try: |
| 331 | + # Extract the first three columns as numbers |
| 332 | + num1, num2, num3 = map(int, columns[:3]) # Assuming integers in the data |
| 333 | + comment = columns[3] if len(columns) > 3 else "" |
| 334 | + parsed_data.append({"Run" : num1, "From" : num2, "To" : num3, "Message" : comment}) |
| 335 | + except ValueError: |
| 336 | + continue # Skip lines where first three columns are not numeric |
| 337 | + return parsed_data |
| 338 | + |
| 339 | + data = parse_file(filename) |
| 340 | + # print (data) |
| 341 | + df = pd.DataFrame(data) # convert to data frame for easy handling |
332 | 342 |
|
333 | 343 | # extract data for this run number |
334 | 344 | filtered = df[df['Run'] == run] |
|
0 commit comments