-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathclass.duplicateFileRemover.php
More file actions
161 lines (135 loc) · 4.21 KB
/
class.duplicateFileRemover.php
File metadata and controls
161 lines (135 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
<?php
/**
* @package DuplicateFileRemover
* @author Johnson Omotosho
* @link http://github.com/brojohnson
* @version 0.0.1
*/
namespace Amantosh;
use Exception;
class DuplicateFileRemover
{
protected $dir, $files;
public $showProgress = true;
/**
* Constructor.
*
* Set path to the directory to remove deuplicate found.
*
* @param string $path Path to the valid diretory.
*/
public function __construct(string $path)
{
if (!is_dir($path)) {
throw new Exception("$path is not a valid directory.");
}
$this->dir = $path;
$the_files = scandir($this->dir);
$this->files = array_filter($the_files, function ($item) {
return !($item == "." or $item == ".."
/**
* Ignore sub-directories.
* call setPath method to scan new directories.
*/
or is_dir($item)
/**
* Uncomment this to specify some files to exclude after files had been scanned.
*/
// or ! in_array('some files to exclude!')
);
});
}
/**
* Function to call to start removing duplicates.
*
* @return self
*/
public function start_process(): self
{
array_walk($this->files, array($this, 'remove_duplicates'));
return $this;
}
/**
* Set new path for the class to scan.
*
* @param string $path The new path to set for the class.
*
* @return self
*/
public function setPath(string $path): self
{
$this->__construct($path);
return $this;
}
private function showProgress(string $msg): void
{
if (!$this->showProgress) return;
echo $msg;
}
/**
* Concatanates files with its directory.
*
* @param string $file The filename
*
* @return string
*/
private function join_dir(string $file): string
{
return $this->dir . DIRECTORY_SEPARATOR . $file;
}
/**
* Deletes files that are found to be duplicates.
*
* This function deletes arrays of files that are found to be duplicates.
*
*
* @param array $file Array of files given to delete
*
* @return viod
*/
private function delete_all_files(array $file): void
{
$file_to_delete = array_map(function ($filename) use ($file) {
if ($filename == $file) return null;
return $this->join_dir($filename);
}, $file);
array_walk($file_to_delete, function ($item) {
if (file_exists($item)) {
$this->showProgress("\tDeleting $item\n");
unlink($item);
}
});
}
/**
* Seperate duplcates files.
*
* This function removes duplicates files by comparing their sha1.
*
* @param $item File item to compare.
*
* @return viod
*/
private function remove_duplicates($item): void
{
$this->showProgress("\nReading $item\n");
$is_duplicate_file = array_filter($this->files, function ($cur_file) use ($item) {
$this->showProgress("\tComparing $cur_file\n");
$cur_file = $this->join_dir($cur_file);
$item = $this->join_dir($item);
if (!file_exists($cur_file) or !file_exists($item)) return null;
/**
* This process is very slow
* I wished I'd a better way to do this.
*/
return sha1_file($cur_file) == sha1_file($item);
});
/**
* Check if we have over 1 duplicates files
* if true then remove one of the files and delete the rest.
*/
if (count($is_duplicate_file) > 1) {
array_shift($is_duplicate_file);
$this->delete_all_files($is_duplicate_file);
}
}
}