1 | #!/usr/bin/env python |
2 | |
3 | from __future__ import absolute_import, division, print_function |
4 | import os |
5 | import re |
6 | import subprocess |
7 | import sys |
8 | import tempfile |
9 | |
10 | ### |
11 | |
12 | class DeltaAlgorithm(object): |
13 | def __init__(self): |
14 | self.cache = set() |
15 | |
16 | def test(self, changes): |
17 | abstract |
18 | |
19 | ### |
20 | |
21 | def getTestResult(self, changes): |
22 | # There is no reason to cache successful tests because we will |
23 | # always reduce the changeset when we see one. |
24 | |
25 | changeset = frozenset(changes) |
26 | if changeset in self.cache: |
27 | return False |
28 | elif not self.test(changes): |
29 | self.cache.add(changeset) |
30 | return False |
31 | else: |
32 | return True |
33 | |
34 | def run(self, changes, force=False): |
35 | # Make sure the initial test passes, if not then (a) either |
36 | # the user doesn't expect monotonicity, and we may end up |
37 | # doing O(N^2) tests, or (b) the test is wrong. Avoid the |
38 | # O(N^2) case unless user requests it. |
39 | if not force: |
40 | if not self.getTestResult(changes): |
41 | raise ValueError('Initial test passed to delta fails.') |
42 | |
43 | # Check empty set first to quickly find poor test functions. |
44 | if self.getTestResult(set()): |
45 | return set() |
46 | else: |
47 | return self.delta(changes, self.split(changes)) |
48 | |
49 | def split(self, S): |
50 | """split(set) -> [sets] |
51 | |
52 | Partition a set into one or two pieces. |
53 | """ |
54 | |
55 | # There are many ways to split, we could do a better job with more |
56 | # context information (but then the API becomes grosser). |
57 | L = list(S) |
58 | mid = len(L)//2 |
59 | if mid==0: |
60 | return L, |
61 | else: |
62 | return L[:mid],L[mid:] |
63 | |
64 | def delta(self, c, sets): |
65 | # assert(reduce(set.union, sets, set()) == c) |
66 | |
67 | # If there is nothing left we can remove, we are done. |
68 | if len(sets) <= 1: |
69 | return c |
70 | |
71 | # Look for a passing subset. |
72 | res = self.search(c, sets) |
73 | if res is not None: |
74 | return res |
75 | |
76 | # Otherwise, partition sets if possible; if not we are done. |
77 | refined = sum(map(list, map(self.split, sets)), []) |
78 | if len(refined) == len(sets): |
79 | return c |
80 | |
81 | return self.delta(c, refined) |
82 | |
83 | def search(self, c, sets): |
84 | for i,S in enumerate(sets): |
85 | # If test passes on this subset alone, recurse. |
86 | if self.getTestResult(S): |
87 | return self.delta(S, self.split(S)) |
88 | |
89 | # Otherwise if we have more than two sets, see if test |
90 | # pases without this subset. |
91 | if len(sets) > 2: |
92 | complement = sum(sets[:i] + sets[i+1:],[]) |
93 | if self.getTestResult(complement): |
94 | return self.delta(complement, sets[:i] + sets[i+1:]) |
95 | |
96 | ### |
97 | |
98 | class Token(object): |
99 | def __init__(self, type, data, flags, file, line, column): |
100 | self.type = type |
101 | self.data = data |
102 | self.flags = flags |
103 | self.file = file |
104 | self.line = line |
105 | self.column = column |
106 | |
107 | kTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", |
108 | re.DOTALL | re.MULTILINE) |
109 | |
110 | def getTokens(path): |
111 | p = subprocess.Popen(['clang','-dump-raw-tokens',path], |
112 | stdin=subprocess.PIPE, |
113 | stdout=subprocess.PIPE, |
114 | stderr=subprocess.PIPE) |
115 | out,err = p.communicate() |
116 | |
117 | tokens = [] |
118 | collect = None |
119 | for ln in err.split('\n'): |
120 | # Silly programmers refuse to print in simple machine readable |
121 | # formats. Whatever. |
122 | if collect is None: |
123 | collect = ln |
124 | else: |
125 | collect = collect + '\n' + ln |
126 | if 'Loc=<' in ln and ln.endswith('>'): |
127 | ln,collect = collect,None |
128 | tokens.append(Token(*kTokenRE.match(ln).groups())) |
129 | |
130 | return tokens |
131 | |
132 | ### |
133 | |
134 | class TMBDDelta(DeltaAlgorithm): |
135 | def __init__(self, testProgram, tokenLists, log): |
136 | def patchName(name, suffix): |
137 | base,ext = os.path.splitext(name) |
138 | return base + '.' + suffix + ext |
139 | super(TMBDDelta, self).__init__() |
140 | self.testProgram = testProgram |
141 | self.tokenLists = tokenLists |
142 | self.tempFiles = [patchName(f,'tmp') |
143 | for f,_ in self.tokenLists] |
144 | self.targetFiles = [patchName(f,'ok') |
145 | for f,_ in self.tokenLists] |
146 | self.log = log |
147 | self.numTests = 0 |
148 | |
149 | def writeFiles(self, changes, fileNames): |
150 | assert len(fileNames) == len(self.tokenLists) |
151 | byFile = [[] for i in self.tokenLists] |
152 | for i,j in changes: |
153 | byFile[i].append(j) |
154 | |
155 | for i,(file,tokens) in enumerate(self.tokenLists): |
156 | f = open(fileNames[i],'w') |
157 | for j in byFile[i]: |
158 | f.write(tokens[j]) |
159 | f.close() |
160 | |
161 | return byFile |
162 | |
163 | def test(self, changes): |
164 | self.numTests += 1 |
165 | |
166 | byFile = self.writeFiles(changes, self.tempFiles) |
167 | |
168 | if self.log: |
169 | print('TEST - ', end=' ', file=sys.stderr) |
170 | if self.log > 1: |
171 | for i,(file,_) in enumerate(self.tokenLists): |
172 | indices = byFile[i] |
173 | if i: |
174 | sys.stderr.write('\n ') |
175 | sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i]))) |
176 | prev = None |
177 | for j in byFile[i]: |
178 | if prev is None or j != prev + 1: |
179 | if prev: |
180 | sys.stderr.write('%d][' % prev) |
181 | sys.stderr.write(str(j)) |
182 | sys.stderr.write(':') |
183 | prev = j |
184 | if byFile[i]: |
185 | sys.stderr.write(str(byFile[i][-1])) |
186 | sys.stderr.write('] ') |
187 | else: |
188 | print(', '.join(['%s:%d tokens' % (file, len(byFile[i])) |
189 | for i,(file,_) in enumerate(self.tokenLists)]), end=' ', file=sys.stderr) |
190 | |
191 | p = subprocess.Popen([self.testProgram] + self.tempFiles) |
192 | res = p.wait() == 0 |
193 | |
194 | if res: |
195 | self.writeFiles(changes, self.targetFiles) |
196 | |
197 | if self.log: |
198 | print('=> %s' % res, file=sys.stderr) |
199 | else: |
200 | if res: |
201 | print('\nSUCCESS (%d tokens)' % len(changes)) |
202 | else: |
203 | sys.stderr.write('.') |
204 | |
205 | return res |
206 | |
207 | def run(self): |
208 | res = super(TMBDDelta, self).run([(i,j) |
209 | for i,(file,tokens) in enumerate(self.tokenLists) |
210 | for j in range(len(tokens))]) |
211 | self.writeFiles(res, self.targetFiles) |
212 | if not self.log: |
213 | print(file=sys.stderr) |
214 | return res |
215 | |
216 | def tokenBasedMultiDelta(program, files, log): |
217 | # Read in the lists of tokens. |
218 | tokenLists = [(file, [t.data for t in getTokens(file)]) |
219 | for file in files] |
220 | |
221 | numTokens = sum([len(tokens) for _,tokens in tokenLists]) |
222 | print("Delta on %s with %d tokens." % (', '.join(files), numTokens)) |
223 | |
224 | tbmd = TMBDDelta(program, tokenLists, log) |
225 | |
226 | res = tbmd.run() |
227 | |
228 | print("Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles), |
229 | len(res), |
230 | tbmd.numTests)) |
231 | |
232 | def main(): |
233 | from optparse import OptionParser, OptionGroup |
234 | parser = OptionParser("%prog <test program> {files+}") |
235 | parser.add_option("", "--debug", dest="debugLevel", |
236 | help="set debug level [default %default]", |
237 | action="store", type=int, default=0) |
238 | (opts, args) = parser.parse_args() |
239 | |
240 | if len(args) <= 1: |
241 | parser.error('Invalid number of arguments.') |
242 | |
243 | program,files = args[0],args[1:] |
244 | |
245 | md = tokenBasedMultiDelta(program, files, log=opts.debugLevel) |
246 | |
247 | if __name__ == '__main__': |
248 | try: |
249 | main() |
250 | except KeyboardInterrupt: |
251 | print('Interrupted.', file=sys.stderr) |
252 | os._exit(1) # Avoid freeing our giant cache. |
253 | |