-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaggregate.py
More file actions
77 lines (65 loc) · 2.79 KB
/
aggregate.py
File metadata and controls
77 lines (65 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
import os
import pandas as pd
import numpy as np
extraction_folder_path = '.\\extractions\\'
output_folder_path = '.\\results\\'
# Get all the folders with full path:
persons_dir = [f.name for f in os.scandir(extraction_folder_path) if f.is_dir()]
dfc = pd.read_csv("labelled_data.csv")
# Aggregate per person and get the final labels:
# ID | Label (%) | Total Images | Total Labelled
person_ids = []
person_labels = []
total_images = []
total_images_labelled = []
total_images_labelled_percentage = []
person_start_frame = []
person_end_frame = []
for id in persons_dir:
dfp = dfc[dfc['id'] == int(id)]
# Get total images in folder:
person_path = os.path.join(extraction_folder_path, id)
# Get all the files:
nTotal = len(os.listdir(person_path))
person_ids.append(int(id))
total_images.append(nTotal)
if dfp.empty:
# No labels
person_labels.append('NA')
total_images_labelled.append(0)
total_images_labelled_percentage.append(0)
else:
print('Aggregating {0}'.format(id))
# Get start/end frames:
start_frame_num = min(dfp['frame'])
end_frame_num = max(dfp['frame'])
person_start_frame.append(start_frame_num)
person_end_frame.append(end_frame_num)
nMale = sum(dfp['label'] == 'male')
nFemale = sum(dfp['label'] == 'female')
if nMale == 0 and nFemale == 0:
person_labels.append('NA')
total_images_labelled.append(0)
total_images_labelled_percentage.append(0)
elif nMale == nFemale:
dfp_male = dfp[dfp['label'] == 'male']
dfp_female = dfp[dfp['label'] == 'female']
if max(dfp_male['probability']) > max(dfp_female['probability']):
person_labels.append('male')
total_images_labelled.append(nMale)
total_images_labelled_percentage.append(nMale/nTotal)
else:
person_labels.append('female')
total_images_labelled.append(nFemale)
total_images_labelled_percentage.append(nFemale/nTotal)
elif nMale > nFemale:
person_labels.append('male')
total_images_labelled.append(nMale)
total_images_labelled_percentage.append(nMale/nTotal)
else:
person_labels.append('female')
total_images_labelled.append(nFemale)
total_images_labelled_percentage.append(nFemale/nTotal)
dfa = pd.DataFrame(data={'id':person_ids,'label':person_labels,'total_images':total_images,'total_images_labelled':total_images_labelled,'total_images_labelled_percentage':total_images_labelled_percentage,'start_frame':start_frame_num,'end_frame':end_frame_num})
dfa.to_csv(os.path.join(output_folder_path,'aggregated_data.csv'))