33# A python script with the goal
44# to transform a (large) compilations database
55# to a smaller compilations database, given a list of source files
6- # that have changed (currently only supporting c++ source and header files)
6+ # that have changed (currently only supporting c++ source and header files).
77# This should be useful to reduce the time spent in static code analysis by only
8- # checking files that have changed, or are influenced by a change
8+ # checking files that have changed, or are influenced by a change.
99#
1010# First version: Sandro Wenzel (June 2017)
1111
12+ import argparse
1213import json
1314import os
1415import subprocess
2021import time
2122import sys
2223
23- # this is a list of changed files we get from git (or the pull request)
24- listofchangedfiles = ['O2Device.cxx' , 'CalDet.h' , 'CalDet.cxx' ]
25- checkall = False
24+ verbosity = 0
25+
26+ def parseArgs ():
27+ """ Setup + Parse arguments from command line; return a parse object """
28+ parser = argparse .ArgumentParser (description = 'Runs over all entries in a '
29+ 'compilation database. Produces a filtered/thinned version of it.' )
30+ parser .add_argument ('-j' , type = int , default = 0 ,
31+ help = 'Number of threads to be used for processing. In any case,'
32+ 'not more that the number of available CPU threads will be used (which is the default).' )
33+ parser .add_argument ('-use-files' , default = None ,
34+ help = '\' :\' separated file list used to thin out the database. Usually a list of header and source files.'
35+ '\n If no files are given, the default behaviour is to run over the complete database and filter out'
36+ 'only ROOT dictionary files and Protobuf generated files.' )
37+ parser .add_argument ('-exclude-files' , default = None ,
38+ help = 'A regular expression telling which files to exclude from the database. Takes precedence over \' -use-files\' .' )
39+ parser .add_argument ('-o' , default = 'thinned_compile_commands.json' ,
40+ help = 'Filename of output compilations database. [default: thinned_compile_commands.json]' )
41+ parser .add_argument ('--verbose' , type = int , default = 0 ,
42+ help = 'Display verbose information about what is going on' )
43+ return parser .parse_args ()
44+
45+ def verboseLog (string , level = 0 ):
46+ global verbosity
47+ if verbosity > 0 :
48+ print string
49+
50+ def getListOfChangedFiles (colonseparatedfilepaths ):
51+ """ processes the argument '-use-files' and returns a python list of filenames """
52+ return colonseparatedfilepaths .split (":" )
53+
54+ def makeInvalidClosure (args ):
55+ regex = None
56+ if args .exclude_files :
57+ regex = re .compile (args .exclude_files )
58+ def f (filename ):
59+ return regex .match (filename ) is not None if regex else None
60+ return f
2661
2762def isHeaderFile (filename ):
2863 # make this more general
2964 expression = ".*\.h"
3065 # make this more efficient by compiling the expression
31- result = re .match (expression , filename )
32- if not result == None :
33- return True
34- return False
66+ return re .match (expression , filename ) is not None
3567
3668def isSourceFile (filename ):
3769 # make this more general
3870 expression = ".*\.cxx"
3971 # make this more efficient by compiling the expression
40- result = re .match (expression , filename )
41- if not result == None :
42- return True
43- return False
44-
45- def isROOTDictionaryFile (filename ):
46- expression = ".*G\_\_.*\.cxx"
47- # make this more efficient by compiling the expression
48- result = re .match (expression , filename )
49- if not result == None :
50- return True
51- return False
52-
53- def isProtoBuffFile (filename ):
54- expression = ".*\.pb\.cc"
55- # make this more efficient by compiling the expression
56- result = re .match (expression , filename )
57- if not result == None :
58- return True
59- return False
60-
61- def isInvalid (filename ):
62- return isROOTDictionaryFile (filename ) or isProtoBuffFile (filename )
72+ return re .match (expression , filename ) is not None
6373
6474# modifies a compilation command by appending -MM (and removing the -o flag)
6575# in order to retrieve the header dependencies
@@ -84,10 +94,7 @@ def modifyCompileCommand(command):
8494def matchesHeader (line , header ):
8595 expression = ".*\.h"
8696 # make this more efficient by compiling the expression
87- result = re .match (expression , line )
88- if not result == None :
89- return True
90- return False
97+ return re .match (expression , line ) is not None
9198
9299def queryListOfHeaders (command ):
93100 proc = subprocess .Popen (command , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
@@ -99,7 +106,7 @@ def queryListOfHeaders(command):
99106
100107# service that processes one item in the queue
101108def processItem (keepalive , changedheaderlist , queue , outqueue ):
102- while len (keepalive )> 0 :
109+ while len (keepalive ) > 0 :
103110 try :
104111 # this operation is blocking for at most 0.5 seconds
105112 # sad hack to be able to check whether we want this thread to be kept alive
@@ -111,6 +118,7 @@ def processItem(keepalive, changedheaderlist, queue, outqueue):
111118 expression = ".*" + header
112119 matches = re .match (expression , include )
113120 if not matches == None :
121+ verboseLog ("Adding " + entry ['file' ] + " because of a dependence on a modified header" )
114122 outqueue .put (entry )
115123 continue
116124
@@ -128,9 +136,27 @@ def reportProgress(keepalive, queue, q2):
128136# THE MAIN FUNCTION
129137#
130138def main ():
139+ checkall = False
140+ listofchangedfiles = []
141+ args = parseArgs ()
142+ global verbosity
143+ verbosity = args .verbose
144+ if not args .use_files :
145+ verboseLog ("no changeset given ... putting all (modulo the exclude set)" )
146+ checkall = True
147+ else :
148+ listofchangedfiles = getListOfChangedFiles (args .use_files )
149+
150+ #setup the isInvalid (closure) function
151+ isInvalid = makeInvalidClosure (args )
131152
132153 #open the compilations database
133- file = open ('compile_commands.json' ).read ()
154+ try :
155+ file = open ('compile_commands.json' ).read ()
156+ except IOError :
157+ print "Problem opening the compilation database (file not found)"
158+ sys .exit (1 )
159+
134160 #convert json to dict
135161 data = json .loads (file );
136162
@@ -143,13 +169,18 @@ def main():
143169 elif isSourceFile (file ):
144170 changedsourcefilelist .append (file )
145171
146- # make a queue
172+ # make input/output queues for multithreaded processing
147173 inputqueue = Queue .Queue ()
148174 outputqueue = Queue .Queue ()
149175 keepAlive = ['alive' ]
150176
151177 # make some servicing threads
152178 max_task = multiprocessing .cpu_count ()
179+ if args .j :
180+ if args .j > 0 :
181+ max_task = max (max_task ,j )
182+
183+ verboseLog ("processing with " + str (max_task ) + " threads." )
153184 for _ in range (max_task ):
154185 t = threading .Thread (target = processItem , args = (keepAlive ,changedheaderlist ,inputqueue ,outputqueue ))
155186 t .deamon = True
@@ -162,38 +193,41 @@ def main():
162193
163194 outputdict = []
164195 #scan through compile database and filter against files
165- print "Processing " + str (len (data )) + " items "
196+ verboseLog ( "Processing " + str (len (data )) + " items." )
166197 for entry in data :
167198 filename = entry ['file' ]
168199 basename = os .path .basename (filename )
169200
170201 # check if invalid anyway
171202 if (isInvalid (basename )):
203+ verboseLog ("Excluding " + basename + " because of exclude filter" )
172204 continue
173205
174206 # check if this entry is part of the changed source file list
175207 # if yes, continue directly
176208 if checkall == True or (basename in changedsourcefilelist ):
209+ verboseLog ("Adding " + entry ['file' ] + " because of presence in modify list (or lack thereof)" )
177210 outputqueue .put (entry )
178211 continue
179-
180- # otherwise check if this source file is influenced by some changed header file
181- # TODO: if the header does not contain a template, it might be enough
182- # to only add one single source files that depends on it????
183- inputqueue .put (entry )
212+
213+ if len (changedheaderlist ) > 0 :
214+ # otherwise check if this source file is influenced by some changed header file
215+ # TODO: if the header does not contain a template, it might be enough
216+ # to only add one single source files that depends on it????
217+ inputqueue .put (entry )
184218
185219 # wait on the queue --> wait until queue is completely empty
186220 inputqueue .join ()
187221 # now we can shut down the deamon threads
188222 keepAlive [:]= []
189223
190224 #put outputqueue into outputdict
191- while outputqueue .qsize ()> 0 :
225+ while outputqueue .qsize () > 0 :
192226 outputdict .append (outputqueue .get (False ))
193227
194228 # write result dictionary to json
195229 outjson = json .dumps (outputdict , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
196- with open ("thinned_compile_commands.json" ,'w' ) as fp :
230+ with open (args . o ,'w' ) as fp :
197231 fp .write (outjson )
198232
199233 return
0 commit comments