Skip to content

Commit 694d87f

Browse files
committed
updates
1 parent 87347a2 commit 694d87f

File tree

3 files changed

+136
-5
lines changed

3 files changed

+136
-5
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ local/*
5555
*.csv
5656
*.json
5757
createItemMetadataFromCSV_*
58+
.txt
5859

5960
# Environments
6061
.env

fileListMetadataReconcile.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# -*- coding: utf-8 -*-
2+
import csv
3+
import time
4+
import os
5+
import argparse
6+
7+
parser = argparse.ArgumentParser()
8+
parser.add_argument('-d', '--directory', help='the directory of the files. optional - if not provided, the script will ask for input')
9+
parser.add_argument('-f', '--fileNameCSV', help='the metadata CSV. optional - if not provided, the script will ask for input')
10+
parser.add_argument('-e', '--fileExtension', help='the file extension. optional - if not provided, the script will ask for input')
11+
args = parser.parse_args()
12+
13+
if args.directory:
14+
directory = args.directory
15+
else:
16+
directory = raw_input('Enter directory (C:/Test/): ')
17+
if args.fileNameCSV:
18+
fileNameCSV = args.fileNameCSV
19+
else:
20+
fileNameCSV = raw_input('Enter metadata CSV: ')
21+
if args.fileExtension:
22+
fileExtension = args.fileExtension
23+
else:
24+
fileExtension = raw_input('Enter file extension: ')
25+
26+
startTime = time.time()
27+
fileIdentifierList = []
28+
for root, dirs, files in os.walk(directory, topdown=True):
29+
for file in files:
30+
if file.endswith(fileExtension):
31+
file.replace('.'+fileExtension,'')
32+
fileIdentifierList.append(file)
33+
34+
elapsedTime = time.time() - startTime
35+
m, s = divmod(elapsedTime, 60)
36+
h, m = divmod(m, 60)
37+
print 'File list creation time: ','%d:%02d:%02d' % (h, m, s)
38+
39+
f=csv.writer(open('collectionfileList.csv', 'wb'))
40+
f.writerow(['fileName'])
41+
42+
for file in fileIdentifierList:
43+
f.writerow([file])
44+
45+
metadataIdentifierList = []
46+
f=csv.writer(open('metadataFileList.csv', 'wb'))
47+
f.writerow(['metadataItemID'])
48+
with open(fileNameCSV) as csvfile:
49+
reader = csv.DictReader(csvfile)
50+
for row in reader:
51+
value = row['fileIdentifier']
52+
f.writerow([value])
53+
metadataIdentifierList.append(value)
54+
55+
fileMatches = []
56+
for fileID in fileIdentifierList:
57+
for metadataID in metadataIdentifierList:
58+
if fileID.startswith(metadataID):
59+
fileMatches.append(fileID)
60+
61+
f=csv.writer(open('filesNotInMetadata.csv', 'wb'))
62+
f.writerow(['fileItemID'])
63+
filesNotInMetadata = set(fileIdentifierList) - set(fileMatches)
64+
for file in filesNotInMetadata:
65+
f.writerow([file])
66+
67+
metadataMatches = []
68+
for metadataID in metadataIdentifierList:
69+
for fileID in fileIdentifierList:
70+
if fileID.startswith(metadataID):
71+
metadataMatches.append(metadataID)
72+
73+
metadataWithNoFiles = set(metadataIdentifierList) - set(metadataMatches)
74+
75+
with open(fileNameCSV) as csvfile:
76+
f=csv.writer(open('metadataWithNoFiles.csv', 'wb'))
77+
reader = csv.DictReader(csvfile)
78+
header = next(reader)
79+
headerRow = []
80+
for k,v in header.iteritems():
81+
headerRow.append(k)
82+
f.writerow(headerRow)
83+
for row in reader:
84+
csvRow = []
85+
for metadata in metadataWithNoFiles:
86+
if metadata == row['fileIdentifier']:
87+
for value in headerRow:
88+
csvRow.append(row[value])
89+
f.writerow(csvRow)
90+
91+
with open(fileNameCSV) as csvfile:
92+
f=csv.writer(open('metadataWithFiles.csv', 'wb'))
93+
reader = csv.DictReader(csvfile)
94+
header = next(reader)
95+
headerRow = []
96+
for k,v in header.iteritems():
97+
headerRow.append(k)
98+
f.writerow(headerRow)
99+
for row in reader:
100+
csvRow = []
101+
for metadata in metadataMatches:
102+
if metadata == row['fileIdentifier']:
103+
for value in headerRow:
104+
csvRow.append(row[value])
105+
f.writerow(csvRow)
106+
107+
elapsedTime = time.time() - startTime
108+
m, s = divmod(elapsedTime, 60)
109+
h, m = divmod(m, 60)
110+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

postCollection.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,31 @@
66
import os
77
import csv
88
import urllib3
9+
import argparse
10+
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('-d', '--directory', help='the directory of the files. optional - if not provided, the script will ask for input')
13+
parser.add_argument('-e', '--fileExtension', help='the file extension. optional - if not provided, the script will ask for input')
14+
parser.add_argument('-i', '--communityHandle', help='handle of the community. optional - if not provided, the script will ask for input')
15+
parser.add_argument('-n', '--collectionName', help='the name of the collection. optional - if not provided, the script will ask for input')
16+
args = parser.parse_args()
17+
18+
if args.directory:
19+
directory = args.directory
20+
else:
21+
directory = raw_input('Enter directory (C:/Test/): ')
22+
if args.fileExtension:
23+
fileExtension = args.fileExtension
24+
else:
25+
fileExtension = raw_input('Enter file extension: ')
26+
if args.communityHandle:
27+
communityHandle = args.communityHandle
28+
else:
29+
communityHandle = raw_input('Enter community handle: ')
30+
if args.collectionName:
31+
collectionName = args.collectionName
32+
else:
33+
collectionName = raw_input('Enter collection name: ')
934

1035
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
1136

@@ -25,11 +50,6 @@
2550
filePath = secrets.filePath
2651
verify = secrets.verify
2752

28-
directory = raw_input('Enter directory name: ')
29-
fileExtension = '.'+raw_input('Enter file extension: ')
30-
communityHandle = raw_input('Enter community handle: ')
31-
collectionName = raw_input('Enter collection name: ')
32-
3353
startTime = time.time()
3454
data = {'email':email,'password':password}
3555
header = {'content-type':'application/json','accept':'application/json'}

0 commit comments

Comments
 (0)