Skip to content

Commit 084efcd

Browse files
committed
A python tool to thin/filter on compilations databases
Initial version of a python tool which is capable of producing a thinned-out compilations database, given a list of (changed) source files and or a list of (changed) header files. The primary goal of this tool is to accelerate analysis tasks (static code analysis) based on compilations database by only focusing on stuff that might have changed (for instance in a pull request).
1 parent d78747b commit 084efcd

File tree

3 files changed

+208
-0
lines changed

3 files changed

+208
-0
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,8 @@ add_subdirectory(plugin)
5252
# for testing
5353
add_subdirectory(test)
5454

55+
# some extra utilities
56+
add_subdirectory(utility)
57+
58+
5559
endif()

utility/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
install(PROGRAMS ThinCompilationsDatabase.py DESTINATION bin)
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#!/usr/bin/python
2+
3+
# A python script with the goal
4+
# to transform a (large) compilations database
5+
# to a smaller compilations database, given a list of source files
6+
# that have changed (currently only supporting c++ source and header files)
7+
# This should be useful to reduce the time spent in static code analysis by only
8+
# checking files that have changed, or are influenced by a change
9+
#
10+
# First version: Sandro Wenzel (June 2017)
11+
12+
import json
13+
import os
14+
import subprocess
15+
import re
16+
# these are for queue syncronized multi-threading
17+
import Queue
18+
import threading
19+
import multiprocessing
20+
import time
21+
import sys
22+
23+
# this is a list of changed files we get from git (or the pull request)
24+
listofchangedfiles=['O2Device.cxx', 'CalDet.h', 'CalDet.cxx']
25+
checkall=False
26+
27+
def isHeaderFile(filename):
28+
# make this more general
29+
expression=".*\.h"
30+
# make this more efficient by compiling the expression
31+
result=re.match(expression, filename)
32+
if not result == None:
33+
return True
34+
return False
35+
36+
def isSourceFile(filename):
37+
# make this more general
38+
expression=".*\.cxx"
39+
# make this more efficient by compiling the expression
40+
result=re.match(expression, filename)
41+
if not result == None:
42+
return True
43+
return False
44+
45+
def isROOTDictionaryFile(filename):
46+
expression=".*G\_\_.*\.cxx"
47+
# make this more efficient by compiling the expression
48+
result=re.match(expression, filename)
49+
if not result == None:
50+
return True
51+
return False
52+
53+
def isProtoBuffFile(filename):
54+
expression=".*\.pb\.cc"
55+
# make this more efficient by compiling the expression
56+
result=re.match(expression, filename)
57+
if not result == None:
58+
return True
59+
return False
60+
61+
def isInvalid(filename):
62+
return isROOTDictionaryFile(filename) or isProtoBuffFile(filename)
63+
64+
# modifies a compilation command by appending -MM (and removing the -o flag)
65+
# in order to retrieve the header dependencies
66+
def modifyCompileCommand(command):
67+
newcommand=[]
68+
outputflagseen=False
69+
# take out the output command
70+
for token in command.split(' '):
71+
if len(token)==0:
72+
continue
73+
if token=='-o':
74+
outputflagseen=True;
75+
else:
76+
if not outputflagseen:
77+
newcommand.append(token)
78+
else:
79+
outputflagseen=False
80+
# add the -MM flag which will generate the header dependency list for a source file
81+
newcommand.append('-MM')
82+
return newcommand
83+
84+
def matchesHeader(line, header):
85+
expression=".*\.h"
86+
# make this more efficient by compiling the expression
87+
result=re.match(expression, line)
88+
if not result == None:
89+
return True
90+
return False
91+
92+
def queryListOfHeaders(command):
93+
proc=subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
94+
headerlist=[]
95+
for line in proc.stdout:
96+
if matchesHeader(line.strip(), headerlist):
97+
headerlist.append(line.strip())
98+
return headerlist
99+
100+
# service that processes one item in the queue
101+
def processItem(keepalive, changedheaderlist, queue, outqueue):
102+
while len(keepalive)>0:
103+
try:
104+
# this operation is blocking for at most 0.5 seconds
105+
# sad hack to be able to check whether we want this thread to be kept alive
106+
# unfortunately, the main process never exits otherwise
107+
entry = queue.get(True, 0.5)
108+
includedheaderlist=queryListOfHeaders(modifyCompileCommand(entry['command']))
109+
for header in changedheaderlist:
110+
for include in includedheaderlist:
111+
expression=".*"+header
112+
matches = re.match(expression, include)
113+
if not matches == None:
114+
outqueue.put(entry)
115+
continue
116+
117+
queue.task_done()
118+
except Queue.Empty:
119+
pass
120+
121+
def reportProgress(keepalive, queue, q2):
122+
while len(keepalive)>0:
123+
print "input queue has size " + str(queue.qsize())
124+
print "output queue has size " + str(q2.qsize())
125+
time.sleep(1)
126+
127+
#
128+
# THE MAIN FUNCTION
129+
#
130+
def main():
131+
132+
#open the compilations database
133+
file=open('compile_commands.json').read()
134+
#convert json to dict
135+
data=json.loads(file);
136+
137+
#make 2 lists, changedheaders and changedsourcefiles
138+
changedheaderlist=[]
139+
changedsourcefilelist=[]
140+
for file in listofchangedfiles:
141+
if isHeaderFile(file):
142+
changedheaderlist.append(file)
143+
elif isSourceFile(file):
144+
changedsourcefilelist.append(file)
145+
146+
# make a queue
147+
inputqueue = Queue.Queue()
148+
outputqueue = Queue.Queue()
149+
keepAlive=['alive']
150+
151+
# make some servicing threads
152+
max_task = multiprocessing.cpu_count()
153+
for _ in range(max_task):
154+
t = threading.Thread(target=processItem, args=(keepAlive,changedheaderlist,inputqueue,outputqueue))
155+
t.deamon=True
156+
t.start()
157+
158+
# launch the reporting thread
159+
rt = threading.Thread(target=reportProgress, args=(keepAlive,inputqueue,outputqueue))
160+
rt.deamon=True
161+
rt.start()
162+
163+
outputdict=[]
164+
#scan through compile database and filter against files
165+
print "Processing " + str(len(data)) + " items "
166+
for entry in data:
167+
filename=entry['file']
168+
basename=os.path.basename(filename)
169+
170+
# check if invalid anyway
171+
if (isInvalid(basename)):
172+
continue
173+
174+
# check if this entry is part of the changed source file list
175+
# if yes, continue directly
176+
if checkall==True or (basename in changedsourcefilelist):
177+
outputqueue.put(entry)
178+
continue
179+
180+
# otherwise check if this source file is influenced by some changed header file
181+
# TODO: if the header does not contain a template, it might be enough
182+
# to only add one single source files that depends on it????
183+
inputqueue.put(entry)
184+
185+
# wait on the queue --> wait until queue is completely empty
186+
inputqueue.join()
187+
# now we can shut down the deamon threads
188+
keepAlive[:]=[]
189+
190+
#put outputqueue into outputdict
191+
while outputqueue.qsize()>0:
192+
outputdict.append(outputqueue.get(False))
193+
194+
# write result dictionary to json
195+
outjson = json.dumps(outputdict, sort_keys=True, indent=4, separators=(',', ': '))
196+
with open("thinned_compile_commands.json",'w') as fp:
197+
fp.write(outjson)
198+
199+
return
200+
201+
if __name__ == '__main__':
202+
main()

0 commit comments

Comments
 (0)