Skip to content

Commit fa69f32

Browse files
committed
Improvements to ThinCompilationsDatabase.py
* introduce command line arguments to make things configurable * exclude files from database based on regular expression * be able to give changeset to tool via command line * other cleanup and code improvements (error handling, logging, ...)
1 parent 084efcd commit fa69f32

File tree

1 file changed

+81
-47
lines changed

1 file changed

+81
-47
lines changed

utility/ThinCompilationsDatabase.py

100644100755
Lines changed: 81 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
# A python script with the goal
44
# to transform a (large) compilations database
55
# to a smaller compilations database, given a list of source files
6-
# that have changed (currently only supporting c++ source and header files)
6+
# that have changed (currently only supporting c++ source and header files).
77
# This should be useful to reduce the time spent in static code analysis by only
8-
# checking files that have changed, or are influenced by a change
8+
# checking files that have changed, or are influenced by a change.
99
#
1010
# First version: Sandro Wenzel (June 2017)
1111

12+
import argparse
1213
import json
1314
import os
1415
import subprocess
@@ -20,46 +21,55 @@
2021
import time
2122
import sys
2223

23-
# this is a list of changed files we get from git (or the pull request)
24-
listofchangedfiles=['O2Device.cxx', 'CalDet.h', 'CalDet.cxx']
25-
checkall=False
24+
verbosity=0
25+
26+
def parseArgs():
27+
""" Setup + Parse arguments from command line; return a parse object """
28+
parser = argparse.ArgumentParser(description='Runs over all entries in a '
29+
'compilation database. Produces a filtered/thinned version of it.')
30+
parser.add_argument('-j', type=int, default=0,
31+
help='Number of threads to be used for processing. In any case,'
32+
'not more that the number of available CPU threads will be used (which is the default).')
33+
parser.add_argument('-use-files', default=None,
34+
help='\':\' separated file list used to thin out the database. Usually a list of header and source files.'
35+
'\n If no files are given, the default behaviour is to run over the complete database and filter out'
36+
'only ROOT dictionary files and Protobuf generated files.')
37+
parser.add_argument('-exclude-files', default=None,
38+
help='A regular expression telling which files to exclude from the database. Takes precedence over \'-use-files\'.')
39+
parser.add_argument('-o', default='thinned_compile_commands.json',
40+
help='Filename of output compilations database. [default: thinned_compile_commands.json]')
41+
parser.add_argument('--verbose', type=int, default=0,
42+
help='Display verbose information about what is going on')
43+
return parser.parse_args()
44+
45+
def verboseLog(string, level=0):
46+
global verbosity
47+
if verbosity > 0:
48+
print string
49+
50+
def getListOfChangedFiles(colonseparatedfilepaths):
51+
""" processes the argument '-use-files' and returns a python list of filenames """
52+
return colonseparatedfilepaths.split(":")
53+
54+
def makeInvalidClosure(args):
55+
regex = None
56+
if args.exclude_files:
57+
regex=re.compile(args.exclude_files)
58+
def f(filename):
59+
return regex.match(filename) is not None if regex else None
60+
return f
2661

2762
def isHeaderFile(filename):
2863
# make this more general
2964
expression=".*\.h"
3065
# make this more efficient by compiling the expression
31-
result=re.match(expression, filename)
32-
if not result == None:
33-
return True
34-
return False
66+
return re.match(expression, filename) is not None
3567

3668
def isSourceFile(filename):
3769
# make this more general
3870
expression=".*\.cxx"
3971
# make this more efficient by compiling the expression
40-
result=re.match(expression, filename)
41-
if not result == None:
42-
return True
43-
return False
44-
45-
def isROOTDictionaryFile(filename):
46-
expression=".*G\_\_.*\.cxx"
47-
# make this more efficient by compiling the expression
48-
result=re.match(expression, filename)
49-
if not result == None:
50-
return True
51-
return False
52-
53-
def isProtoBuffFile(filename):
54-
expression=".*\.pb\.cc"
55-
# make this more efficient by compiling the expression
56-
result=re.match(expression, filename)
57-
if not result == None:
58-
return True
59-
return False
60-
61-
def isInvalid(filename):
62-
return isROOTDictionaryFile(filename) or isProtoBuffFile(filename)
72+
return re.match(expression, filename) is not None
6373

6474
# modifies a compilation command by appending -MM (and removing the -o flag)
6575
# in order to retrieve the header dependencies
@@ -84,10 +94,7 @@ def modifyCompileCommand(command):
8494
def matchesHeader(line, header):
8595
expression=".*\.h"
8696
# make this more efficient by compiling the expression
87-
result=re.match(expression, line)
88-
if not result == None:
89-
return True
90-
return False
97+
return re.match(expression, line) is not None
9198

9299
def queryListOfHeaders(command):
93100
proc=subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -99,7 +106,7 @@ def queryListOfHeaders(command):
99106

100107
# service that processes one item in the queue
101108
def processItem(keepalive, changedheaderlist, queue, outqueue):
102-
while len(keepalive)>0:
109+
while len(keepalive) > 0:
103110
try:
104111
# this operation is blocking for at most 0.5 seconds
105112
# sad hack to be able to check whether we want this thread to be kept alive
@@ -111,6 +118,7 @@ def processItem(keepalive, changedheaderlist, queue, outqueue):
111118
expression=".*"+header
112119
matches = re.match(expression, include)
113120
if not matches == None:
121+
verboseLog("Adding " + entry['file'] + " because of a dependence on a modified header")
114122
outqueue.put(entry)
115123
continue
116124

@@ -128,9 +136,27 @@ def reportProgress(keepalive, queue, q2):
128136
# THE MAIN FUNCTION
129137
#
130138
def main():
139+
checkall=False
140+
listofchangedfiles=[]
141+
args = parseArgs()
142+
global verbosity
143+
verbosity = args.verbose
144+
if not args.use_files:
145+
verboseLog("no changeset given ... putting all (modulo the exclude set)")
146+
checkall=True
147+
else:
148+
listofchangedfiles=getListOfChangedFiles(args.use_files)
149+
150+
#setup the isInvalid (closure) function
151+
isInvalid=makeInvalidClosure(args)
131152

132153
#open the compilations database
133-
file=open('compile_commands.json').read()
154+
try:
155+
file=open('compile_commands.json').read()
156+
except IOError:
157+
print "Problem opening the compilation database (file not found)"
158+
sys.exit(1)
159+
134160
#convert json to dict
135161
data=json.loads(file);
136162

@@ -143,13 +169,18 @@ def main():
143169
elif isSourceFile(file):
144170
changedsourcefilelist.append(file)
145171

146-
# make a queue
172+
# make input/output queues for multithreaded processing
147173
inputqueue = Queue.Queue()
148174
outputqueue = Queue.Queue()
149175
keepAlive=['alive']
150176

151177
# make some servicing threads
152178
max_task = multiprocessing.cpu_count()
179+
if args.j:
180+
if args.j>0:
181+
max_task = max(max_task,j)
182+
183+
verboseLog("processing with " + str(max_task) + " threads.")
153184
for _ in range(max_task):
154185
t = threading.Thread(target=processItem, args=(keepAlive,changedheaderlist,inputqueue,outputqueue))
155186
t.deamon=True
@@ -162,38 +193,41 @@ def main():
162193

163194
outputdict=[]
164195
#scan through compile database and filter against files
165-
print "Processing " + str(len(data)) + " items "
196+
verboseLog("Processing " + str(len(data)) + " items.")
166197
for entry in data:
167198
filename=entry['file']
168199
basename=os.path.basename(filename)
169200

170201
# check if invalid anyway
171202
if (isInvalid(basename)):
203+
verboseLog("Excluding " + basename + " because of exclude filter")
172204
continue
173205

174206
# check if this entry is part of the changed source file list
175207
# if yes, continue directly
176208
if checkall==True or (basename in changedsourcefilelist):
209+
verboseLog("Adding " + entry['file'] + " because of presence in modify list (or lack thereof)")
177210
outputqueue.put(entry)
178211
continue
179-
180-
# otherwise check if this source file is influenced by some changed header file
181-
# TODO: if the header does not contain a template, it might be enough
182-
# to only add one single source files that depends on it????
183-
inputqueue.put(entry)
212+
213+
if len(changedheaderlist) > 0:
214+
# otherwise check if this source file is influenced by some changed header file
215+
# TODO: if the header does not contain a template, it might be enough
216+
# to only add one single source files that depends on it????
217+
inputqueue.put(entry)
184218

185219
# wait on the queue --> wait until queue is completely empty
186220
inputqueue.join()
187221
# now we can shut down the deamon threads
188222
keepAlive[:]=[]
189223

190224
#put outputqueue into outputdict
191-
while outputqueue.qsize()>0:
225+
while outputqueue.qsize() > 0:
192226
outputdict.append(outputqueue.get(False))
193227

194228
# write result dictionary to json
195229
outjson = json.dumps(outputdict, sort_keys=True, indent=4, separators=(',', ': '))
196-
with open("thinned_compile_commands.json",'w') as fp:
230+
with open(args.o,'w') as fp:
197231
fp.write(outjson)
198232

199233
return

0 commit comments

Comments
 (0)