-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmean_shift.py
More file actions
105 lines (86 loc) · 3.69 KB
/
mean_shift.py
File metadata and controls
105 lines (86 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
__author__ = 'Jiawei yang'
#reference: J.W.Yang???S. Rahardja, and P. Fr??nti, "Mean-shift outlier detection," Int. Conf. Fuzzy Systems and Data Mining (FSDM), 2018.
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.spatial import distance
class MS():
def __init__(self,data,k):
self.data=data
self.k=k
self.iteration_number = 3
def getDst(self,data, data_shifted):
j = 0
dislist = []
while (j < len(data)):
dst = np.sqrt(np.sum((data[j] - data_shifted[j]) ** 2))
dislist.append(dst)
j += 1
return np.array(dislist)
def k_nearest_neighbor(self,point, nbrs,points):
distances, indices = nbrs.kneighbors([point])
k_smallest = []
for i,p in enumerate(indices[0]):
k_smallest.append(points[p])
# k_smallest.pop(0)
del k_smallest[0]
return k_smallest
def get_medoid(self,coords):
coords=np.array(coords)
cost = distance.cdist(coords, coords, 'cityblock')# Manhattan distance, 'euclidean','minkowski'
return coords[np.argmin(cost.sum(axis=0))]
def get_mean(self,coords):
return np.mean(coords, axis=0)
def shift_iterationMean(self,points, k, iteration_number):
shift_points = np.array(points)
shift_points_COPY = shift_points[:]
while(iteration_number>0): #['auto', 'brute','kd_tree', 'ball_tree']
nbrs = NearestNeighbors(n_neighbors=k,algorithm='auto').fit(shift_points_COPY)
for i,point in enumerate(shift_points_COPY):
KNearestNeighbor = self.k_nearest_neighbor(shift_points[i], nbrs,shift_points_COPY)
shift_points[i] =self.get_mean(KNearestNeighbor)# mean or medoid
iteration_number -= 1
shift_points_COPY = shift_points[:]
return shift_points
def shift_iterationMedoid(self,points, k, iteration_number):
shift_points = np.array(points)
shift_points_COPY = shift_points[:]
while (iteration_number > 0): # ['auto', 'brute','kd_tree', 'ball_tree']
nbrs = NearestNeighbors(n_neighbors=k, algorithm='auto').fit(shift_points_COPY)
for i, point in enumerate(shift_points_COPY):
KNearestNeighbor = self.k_nearest_neighbor(shift_points[i], nbrs, shift_points_COPY)
shift_points[i] = self.get_medoid(KNearestNeighbor) # mean or medoid
iteration_number -= 1
shift_points_COPY = shift_points[:]
return shift_points
def runMean(self):
iteration=1
result=self.data[:]
while(iteration<=self.iteration_number):
result = self.shift_iterationMean(result, self.k, 1)
iteration +=1
return self.getDst(self.data,result)
def runMeanShift(self):
iteration = 1
result = self.data[:]
while (iteration <= self.iteration_number):
result = self.shift_iterationMean(result, self.k, 1)
iteration += 1
return result
def runMedoidShift(self):
iteration=1
result=self.data[:]
while(iteration<=self.iteration_number):
result = self.shift_iterationMedoid(result, self.k, 1)
iteration +=1
return result
def runMedoid(self):
iteration=1
result=self.data[:]
while(iteration<=self.iteration_number):
result = self.shift_iterationMedoid(result, self.k, 1)
iteration +=1
return self.getDst(self.data,result)
def run(self):
outlierscore_MOD=self.runMean()
outlierscore_DOD=self.runMedoid()
return outlierscore_MOD,outlierscore_DOD