-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlocal_search.py
More file actions
119 lines (94 loc) · 2.66 KB
/
local_search.py
File metadata and controls
119 lines (94 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pandas as pd
import math
def measure_cp(arr,boundaries):
n=len(arr)
cp=0
for i in range(1,len(boundaries)):
bucket_length= boundaries[i]-boundaries[i-1]
cp+= (bucket_length/n)**2
return cp
def measure_sf(arr,boundaries):
n=len(arr)
sf_0=0
sf_1=0
total_0=arr.count(0)
total_1=arr.count(1)
for i in range(1,len(boundaries)):
bucket=arr[boundaries[i-1]:boundaries[i]]
count_0=bucket.count(0)
count_1=bucket.count(1)
bucket_length= boundaries[i]-boundaries[i-1]
sf_0+=(count_0*bucket_length)/(total_0*n)
sf_1+=(count_1*bucket_length)/(total_1*n)
return max(sf_0,sf_1)/min(sf_0,sf_1)-1
def measure_pf(arr,boundaries):
pf_0=0
pf_1=0
total_0=arr.count(0)
total_1=arr.count(1)
for i in range(1,len(boundaries)):
bucket=arr[boundaries[i-1]:boundaries[i]]
count_0=bucket.count(0)
count_1=bucket.count(1)
pf_0+=(count_0/total_0)**2
pf_1+=(count_1/total_1)**2
return max(pf_0,pf_1)/min(pf_0,pf_1)-1
arr = pd.read_pickle(r'ordering_diabetes.pkl')
m=100
n=len(arr)
bucket_size=n//m
boundaries=[i*bucket_size for i in range(m)]
boundaries.append(n)
max_iter=1000
sf_lb=0
sf_ub=0.05
cp_ub=0.1
F_total=[]
S_total=[]
C_total=[]
for i in range(max_iter):
F=measure_pf(arr,boundaries)
S=measure_sf(arr,boundaries)
C=measure_cp(arr,boundaries)
print(F,S)
F_total.append(F)
S_total.append(S)
C_total.append(C)
mF=math.inf
j_star=0
j_plus_minus=0
for j in range(len(boundaries)):
boundaries[j]=boundaries[j]-1
F_=measure_pf(arr,boundaries)
S_=measure_sf(arr,boundaries)
C_=measure_cp(arr,boundaries)
if F_<mF and sf_lb<=S_<=sf_ub and C_<=cp_ub:
mF=F_
j_star=j
j_plus_minus="-"
boundaries[j]=boundaries[j]+2
F_=measure_pf(arr,boundaries)
S_=measure_sf(arr,boundaries)
C_=measure_cp(arr,boundaries)
if F_<mF and sf_lb<=S_<=sf_ub and C_<=cp_ub:
mF=F_
j_star=j
j_plus_minus="+"
boundaries[j]=boundaries[j]-1
if mF<F:
if j_plus_minus=="-":
boundaries[j_star]=boundaries[j_star]-1
else:
boundaries[j_star]=boundaries[j_star]+1
else:
break
index=[i+1 for i in range(104)]
# print(len(index),len(F_total),len(S_total),len(C_total))
# print(index)
# print(F_total)
# print(S_total)
# print(C_total)
print([index[i*10] for i in range(9)])
print([F_total[i*10] for i in range(9)])
print([S_total[i*10] for i in range(9)])
print([C_total[i*10] for i in range(9)])