-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexportCSV.py
More file actions
87 lines (79 loc) · 2.23 KB
/
exportCSV.py
File metadata and controls
87 lines (79 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import csv
from database import client as db
from datetime import datetime
hostname = input('Enter hostname(optional): ')
while True:
try:
minScore = float(input('Enter Minimum score (0-1): '))
if minScore or (minScore < 0 and minScore > 1):
break
print('Invalid Minimum Score, Try Again\n')
except ValueError:
print('Invalid Minimum Score, Try Again\n')
while True:
try:
maxScore = float(input('Enter Maximum score (0-1): '))
if maxScore or (maxScore < 0 and maxScore > 1):
break
print('Invalid Maximum Score, Try Again\n')
except ValueError:
print('Invalid Maximum Score, Try Again\n')
pipeline = [
{
'$lookup': {
'from': 'SentenceScores',
'localField': '_id',
'foreignField': 'crawledPageId',
'as': 'SentenceScores'
}
}, {
'$unwind': {
'path': '$SentenceScores',
'preserveNullAndEmptyArrays': True
}
}, {
'$addFields': {
'Webpage': {'$concat': ['https://', '$hostname', '$url']},
'sentence': '$SentenceScores.text',
'sentenceScore': '$SentenceScores.score'
}
}, {
'$match': {
'$and': [
{'sentenceScore': {'$gte': minScore}},
{'sentenceScore': {'$lte': maxScore}}
]
}
}, {
'$project': {
'_id': 0,
'hostname': 0,
'url': 0,
'metaData': 0,
'SentenceScores': 0,
'rawHTML': 0
}
}
]
if hostname:
pipeline.insert(0, {
'$match': {
'hostname': hostname
}
})
print('Retriving....')
time = datetime.now()
file = f'./report/{hostname} csv-out {time}.csv'
results = db.ExplictDetect.crawledPage.aggregate(pipeline, allowDiskUse=True)
print('Retrived')
print('Writing to file....')
results = list(results)
if len(results):
keys = results[0].keys()
with open(file, 'w+', newline='') as output_file:
dw = csv.DictWriter(output_file, keys)
dw.writeheader()
dw.writerows(results)
print(f'CSV file exported at {file}')
else:
print('No Data found in given conditions')