-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdetect_repeats.py
More file actions
73 lines (57 loc) · 2.5 KB
/
detect_repeats.py
File metadata and controls
73 lines (57 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import pandas as pd
import os
# Load the CSV into a DataFrame
df = pd.read_csv('DriveUpload/SynchronizedDataSubject1_etiquetado.csv')
# df = pd.read_csv('DriveUpload/combined_data.csv')
print(f"Total rows in combined_data.csv: {len(df)}")
# 1) Identify only the sensor columns that start with 'n'
sensor_cols = [c for c in df.columns if c.startswith('n')]
# 2) For each of those sensors, compare to the value in the previous row
for sensor in sensor_cols:
# Boolean column: True if this row's sample == previous row's sample
df[f'{sensor}_is_repeat'] = df[sensor].eq(df[sensor].shift())
# df[f'{sensor}_is_repeat'] = df[sensor].eq(400)
# First row has no “previous”—mark it False
df.loc[0, f'{sensor}_is_repeat'] = False
# 3) (Optional) Count repeats per sensor
repeat_counts = {
sensor: int(df[f'{sensor}_is_repeat'].sum())
for sensor in sensor_cols
}
# 4) (Optional) Percent of repeats per sensor
repeat_percents = {
sensor: repeat_counts[sensor] / (len(df) - 1) # exclude first row
for sensor in sensor_cols
}
# Print summary
for sensor in sensor_cols:
print(f"{sensor}: {repeat_counts[sensor]} repeats, "
f"{repeat_percents[sensor]:.2%} of intervals")
# 5) (Optional) If you still want a single “any-n-repeat” column:
df['any_n_repeat'] = df[[f'{sensor}_is_repeat' for sensor in sensor_cols]].any(axis=1)
print("Total rows where any 'n' sensor repeated:", int(df['any_n_repeat'].sum()))
print("Percent of rows with any 'n' repeat:", df['any_n_repeat'].mean())
def main():
# Path to your predictions CSV
csv_path = os.path.join('DriveUpload', 'predictions.csv')
if not os.path.exists(csv_path):
print(f"File not found: {csv_path}")
return
# Read CSV and parse timestamps
df = pd.read_csv(csv_path, parse_dates=['timestamp'])
# Compute time differences (in seconds) between consecutive predictions
df['delta'] = df['timestamp'].diff().dt.total_seconds()
# Drop the first NaN delta
deltas = df['delta'].dropna()
# Calculate statistics
avg_interval = deltas.mean()
min_interval = deltas.min()
max_interval = deltas.max()
total_duration = (df['timestamp'].iloc[-1] - df['timestamp'].iloc[0]).total_seconds() + 1
# Output results
print(f"Average interval: {avg_interval:.6f} seconds")
print(f"Minimum interval: {min_interval:.6f} seconds")
print(f"Maximum interval: {max_interval:.6f} seconds")
print(f"Total test duration: {total_duration:.6f} seconds")
if __name__ == '__main__':
main()