1 | #===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===# |
2 | # |
3 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | # See https://llvm.org/LICENSE.txt for license information. |
5 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | # |
7 | #===------------------------------------------------------------------------===# |
8 | |
9 | from __future__ import absolute_import, division, print_function |
10 | |
11 | import sys |
12 | import os |
13 | import subprocess |
14 | import argparse |
15 | import time |
16 | import bisect |
17 | import shlex |
18 | import tempfile |
19 | |
20 | test_env = { 'PATH' : os.environ['PATH'] } |
21 | |
22 | def findFilesWithExtension(path, extension): |
23 | filenames = [] |
24 | for root, dirs, files in os.walk(path): |
25 | for filename in files: |
26 | if filename.endswith(extension): |
27 | filenames.append(os.path.join(root, filename)) |
28 | return filenames |
29 | |
30 | def clean(args): |
31 | if len(args) != 2: |
32 | print('Usage: %s clean <path> <extension>\n' % __file__ + |
33 | '\tRemoves all files with extension from <path>.') |
34 | return 1 |
35 | for filename in findFilesWithExtension(args[0], args[1]): |
36 | os.remove(filename) |
37 | return 0 |
38 | |
39 | def merge(args): |
40 | if len(args) != 3: |
41 | print('Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__ + |
42 | '\tMerges all profraw files from path into output.') |
43 | return 1 |
44 | cmd = [args[0], 'merge', '-o', args[1]] |
45 | cmd.extend(findFilesWithExtension(args[2], "profraw")) |
46 | subprocess.check_call(cmd) |
47 | return 0 |
48 | |
49 | def dtrace(args): |
50 | parser = argparse.ArgumentParser(prog='perf-helper dtrace', |
51 | description='dtrace wrapper for order file generation') |
52 | parser.add_argument('--buffer-size', metavar='size', type=int, required=False, |
53 | default=1, help='dtrace buffer size in MB (default 1)') |
54 | parser.add_argument('--use-oneshot', required=False, action='store_true', |
55 | help='Use dtrace\'s oneshot probes') |
56 | parser.add_argument('--use-ustack', required=False, action='store_true', |
57 | help='Use dtrace\'s ustack to print function names') |
58 | parser.add_argument('--cc1', required=False, action='store_true', |
59 | help='Execute cc1 directly (don\'t profile the driver)') |
60 | parser.add_argument('cmd', nargs='*', help='') |
61 | |
62 | # Use python's arg parser to handle all leading option arguments, but pass |
63 | # everything else through to dtrace |
64 | first_cmd = next(arg for arg in args if not arg.startswith("--")) |
65 | last_arg_idx = args.index(first_cmd) |
66 | |
67 | opts = parser.parse_args(args[:last_arg_idx]) |
68 | cmd = args[last_arg_idx:] |
69 | |
70 | if opts.cc1: |
71 | cmd = get_cc1_command_for_args(cmd, test_env) |
72 | |
73 | if opts.use_oneshot: |
74 | target = "oneshot$target:::entry" |
75 | else: |
76 | target = "pid$target:::entry" |
77 | predicate = '%s/probemod=="%s"/' % (target, os.path.basename(cmd[0])) |
78 | log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)' |
79 | if opts.use_ustack: |
80 | action = 'ustack(1);' |
81 | else: |
82 | action = 'printf("dtrace-Symbol: %s\\n", probefunc);' |
83 | dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action) |
84 | |
85 | dtrace_args = [] |
86 | if not os.geteuid() == 0: |
87 | print( |
88 | 'Script must be run as root, or you must add the following to your sudoers:' |
89 | + '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace') |
90 | dtrace_args.append("sudo") |
91 | |
92 | dtrace_args.extend(( |
93 | 'dtrace', '-xevaltime=exec', |
94 | '-xbufsize=%dm' % (opts.buffer_size), |
95 | '-q', '-n', dtrace_script, |
96 | '-c', ' '.join(cmd))) |
97 | |
98 | if sys.platform == "darwin": |
99 | dtrace_args.append('-xmangled') |
100 | |
101 | start_time = time.time() |
102 | |
103 | with open("%d.dtrace" % os.getpid(), "w") as f: |
104 | f.write("### Command: %s" % dtrace_args) |
105 | subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE) |
106 | |
107 | elapsed = time.time() - start_time |
108 | print("... data collection took %.4fs" % elapsed) |
109 | |
110 | return 0 |
111 | |
112 | def get_cc1_command_for_args(cmd, env): |
113 | # Find the cc1 command used by the compiler. To do this we execute the |
114 | # compiler with '-###' to figure out what it wants to do. |
115 | cmd = cmd + ['-###'] |
116 | cc_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env, universal_newlines=True).strip() |
117 | cc_commands = [] |
118 | for ln in cc_output.split('\n'): |
119 | # Filter out known garbage. |
120 | if (ln == 'Using built-in specs.' or |
121 | ln.startswith('Configured with:') or |
122 | ln.startswith('Target:') or |
123 | ln.startswith('Thread model:') or |
124 | ln.startswith('InstalledDir:') or |
125 | ln.startswith('LLVM Profile Note') or |
126 | ' version ' in ln): |
127 | continue |
128 | cc_commands.append(ln) |
129 | |
130 | if len(cc_commands) != 1: |
131 | print('Fatal error: unable to determine cc1 command: %r' % cc_output) |
132 | exit(1) |
133 | |
134 | cc1_cmd = shlex.split(cc_commands[0]) |
135 | if not cc1_cmd: |
136 | print('Fatal error: unable to determine cc1 command: %r' % cc_output) |
137 | exit(1) |
138 | |
139 | return cc1_cmd |
140 | |
141 | def cc1(args): |
142 | parser = argparse.ArgumentParser(prog='perf-helper cc1', |
143 | description='cc1 wrapper for order file generation') |
144 | parser.add_argument('cmd', nargs='*', help='') |
145 | |
146 | # Use python's arg parser to handle all leading option arguments, but pass |
147 | # everything else through to dtrace |
148 | first_cmd = next(arg for arg in args if not arg.startswith("--")) |
149 | last_arg_idx = args.index(first_cmd) |
150 | |
151 | opts = parser.parse_args(args[:last_arg_idx]) |
152 | cmd = args[last_arg_idx:] |
153 | |
154 | # clear the profile file env, so that we don't generate profdata |
155 | # when capturing the cc1 command |
156 | cc1_env = test_env |
157 | cc1_env["LLVM_PROFILE_FILE"] = os.devnull |
158 | cc1_cmd = get_cc1_command_for_args(cmd, cc1_env) |
159 | |
160 | subprocess.check_call(cc1_cmd) |
161 | return 0 |
162 | |
163 | def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, |
164 | missing_symbols, opts): |
165 | def fix_mangling(symbol): |
166 | if sys.platform == "darwin": |
167 | if symbol[0] != '_' and symbol != 'start': |
168 | symbol = '_' + symbol |
169 | return symbol |
170 | |
171 | def get_symbols_with_prefix(symbol): |
172 | start_index = bisect.bisect_left(all_symbols, symbol) |
173 | for s in all_symbols[start_index:]: |
174 | if not s.startswith(symbol): |
175 | break |
176 | yield s |
177 | |
178 | # Extract the list of symbols from the given file, which is assumed to be |
179 | # the output of a dtrace run logging either probefunc or ustack(1) and |
180 | # nothing else. The dtrace -xdemangle option needs to be used. |
181 | # |
182 | # This is particular to OS X at the moment, because of the '_' handling. |
183 | with open(path) as f: |
184 | current_timestamp = None |
185 | for ln in f: |
186 | # Drop leading and trailing whitespace. |
187 | ln = ln.strip() |
188 | if not ln.startswith("dtrace-"): |
189 | continue |
190 | |
191 | # If this is a timestamp specifier, extract it. |
192 | if ln.startswith("dtrace-TS: "): |
193 | _,data = ln.split(': ', 1) |
194 | if not data.isdigit(): |
195 | print("warning: unrecognized timestamp line %r, ignoring" % ln, |
196 | file=sys.stderr) |
197 | continue |
198 | current_timestamp = int(data) |
199 | continue |
200 | elif ln.startswith("dtrace-Symbol: "): |
201 | |
202 | _,ln = ln.split(': ', 1) |
203 | if not ln: |
204 | continue |
205 | |
206 | # If there is a '`' in the line, assume it is a ustack(1) entry in |
207 | # the form of <modulename>`<modulefunc>, where <modulefunc> is never |
208 | # truncated (but does need the mangling patched). |
209 | if '`' in ln: |
210 | yield (current_timestamp, fix_mangling(ln.split('`',1)[1])) |
211 | continue |
212 | |
213 | # Otherwise, assume this is a probefunc printout. DTrace on OS X |
214 | # seems to have a bug where it prints the mangled version of symbols |
215 | # which aren't C++ mangled. We just add a '_' to anything but start |
216 | # which doesn't already have a '_'. |
217 | symbol = fix_mangling(ln) |
218 | |
219 | # If we don't know all the symbols, or the symbol is one of them, |
220 | # just return it. |
221 | if not all_symbols_set or symbol in all_symbols_set: |
222 | yield (current_timestamp, symbol) |
223 | continue |
224 | |
225 | # Otherwise, we have a symbol name which isn't present in the |
226 | # binary. We assume it is truncated, and try to extend it. |
227 | |
228 | # Get all the symbols with this prefix. |
229 | possible_symbols = list(get_symbols_with_prefix(symbol)) |
230 | if not possible_symbols: |
231 | continue |
232 | |
233 | # If we found too many possible symbols, ignore this as a prefix. |
234 | if len(possible_symbols) > 100: |
235 | print( "warning: ignoring symbol %r " % symbol + |
236 | "(no match and too many possible suffixes)", file=sys.stderr) |
237 | continue |
238 | |
239 | # Report that we resolved a missing symbol. |
240 | if opts.show_missing_symbols and symbol not in missing_symbols: |
241 | print("warning: resolved missing symbol %r" % symbol, file=sys.stderr) |
242 | missing_symbols.add(symbol) |
243 | |
244 | # Otherwise, treat all the possible matches as having occurred. This |
245 | # is an over-approximation, but it should be ok in practice. |
246 | for s in possible_symbols: |
247 | yield (current_timestamp, s) |
248 | |
249 | def uniq(list): |
250 | seen = set() |
251 | for item in list: |
252 | if item not in seen: |
253 | yield item |
254 | seen.add(item) |
255 | |
256 | def form_by_call_order(symbol_lists): |
257 | # Simply strategy, just return symbols in order of occurrence, even across |
258 | # multiple runs. |
259 | return uniq(s for symbols in symbol_lists for s in symbols) |
260 | |
261 | def form_by_call_order_fair(symbol_lists): |
262 | # More complicated strategy that tries to respect the call order across all |
263 | # of the test cases, instead of giving a huge preference to the first test |
264 | # case. |
265 | |
266 | # First, uniq all the lists. |
267 | uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists] |
268 | |
269 | # Compute the successors for each list. |
270 | succs = {} |
271 | for symbols in uniq_lists: |
272 | for a,b in zip(symbols[:-1], symbols[1:]): |
273 | succs[a] = items = succs.get(a, []) |
274 | if b not in items: |
275 | items.append(b) |
276 | |
277 | # Emit all the symbols, but make sure to always emit all successors from any |
278 | # call list whenever we see a symbol. |
279 | # |
280 | # There isn't much science here, but this sometimes works better than the |
281 | # more naive strategy. Then again, sometimes it doesn't so more research is |
282 | # probably needed. |
283 | return uniq(s |
284 | for symbols in symbol_lists |
285 | for node in symbols |
286 | for s in ([node] + succs.get(node,[]))) |
287 | |
288 | def form_by_frequency(symbol_lists): |
289 | # Form the order file by just putting the most commonly occurring symbols |
290 | # first. This assumes the data files didn't use the oneshot dtrace method. |
291 | |
292 | counts = {} |
293 | for symbols in symbol_lists: |
294 | for a in symbols: |
295 | counts[a] = counts.get(a,0) + 1 |
296 | |
297 | by_count = list(counts.items()) |
298 | by_count.sort(key = lambda __n: -__n[1]) |
299 | return [s for s,n in by_count] |
300 | |
301 | def form_by_random(symbol_lists): |
302 | # Randomize the symbols. |
303 | merged_symbols = uniq(s for symbols in symbol_lists |
304 | for s in symbols) |
305 | random.shuffle(merged_symbols) |
306 | return merged_symbols |
307 | |
308 | def form_by_alphabetical(symbol_lists): |
309 | # Alphabetize the symbols. |
310 | merged_symbols = list(set(s for symbols in symbol_lists for s in symbols)) |
311 | merged_symbols.sort() |
312 | return merged_symbols |
313 | |
314 | methods = dict((name[len("form_by_"):],value) |
315 | for name,value in locals().items() if name.startswith("form_by_")) |
316 | |
317 | def genOrderFile(args): |
318 | parser = argparse.ArgumentParser( |
319 | "%prog [options] <dtrace data file directories>]") |
320 | parser.add_argument('input', nargs='+', help='') |
321 | parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path", |
322 | help="Path to the binary being ordered (for getting all symbols)", |
323 | default=None) |
324 | parser.add_argument("--output", dest="output_path", |
325 | help="path to output order file to write", default=None, required=True, |
326 | metavar="PATH") |
327 | parser.add_argument("--show-missing-symbols", dest="show_missing_symbols", |
328 | help="show symbols which are 'fixed up' to a valid name (requires --binary)", |
329 | action="store_true", default=None) |
330 | parser.add_argument("--output-unordered-symbols", |
331 | dest="output_unordered_symbols_path", |
332 | help="write a list of the unordered symbols to PATH (requires --binary)", |
333 | default=None, metavar="PATH") |
334 | parser.add_argument("--method", dest="method", |
335 | help="order file generation method to use", choices=list(methods.keys()), |
336 | default='call_order') |
337 | opts = parser.parse_args(args) |
338 | |
339 | # If the user gave us a binary, get all the symbols in the binary by |
340 | # snarfing 'nm' output. |
341 | if opts.binary_path is not None: |
342 | output = subprocess.check_output(['nm', '-P', opts.binary_path], universal_newlines=True) |
343 | lines = output.split("\n") |
344 | all_symbols = [ln.split(' ',1)[0] |
345 | for ln in lines |
346 | if ln.strip()] |
347 | print("found %d symbols in binary" % len(all_symbols)) |
348 | all_symbols.sort() |
349 | else: |
350 | all_symbols = [] |
351 | all_symbols_set = set(all_symbols) |
352 | |
353 | # Compute the list of input files. |
354 | input_files = [] |
355 | for dirname in opts.input: |
356 | input_files.extend(findFilesWithExtension(dirname, "dtrace")) |
357 | |
358 | # Load all of the input files. |
359 | print("loading from %d data files" % len(input_files)) |
360 | missing_symbols = set() |
361 | timestamped_symbol_lists = [ |
362 | list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, |
363 | missing_symbols, opts)) |
364 | for path in input_files] |
365 | |
366 | # Reorder each symbol list. |
367 | symbol_lists = [] |
368 | for timestamped_symbols_list in timestamped_symbol_lists: |
369 | timestamped_symbols_list.sort() |
370 | symbol_lists.append([symbol for _,symbol in timestamped_symbols_list]) |
371 | |
372 | # Execute the desire order file generation method. |
373 | method = methods.get(opts.method) |
374 | result = list(method(symbol_lists)) |
375 | |
376 | # Report to the user on what percentage of symbols are present in the order |
377 | # file. |
378 | num_ordered_symbols = len(result) |
379 | if all_symbols: |
380 | print("note: order file contains %d/%d symbols (%.2f%%)" % ( |
381 | num_ordered_symbols, len(all_symbols), |
382 | 100.*num_ordered_symbols/len(all_symbols)), file=sys.stderr) |
383 | |
384 | if opts.output_unordered_symbols_path: |
385 | ordered_symbols_set = set(result) |
386 | with open(opts.output_unordered_symbols_path, 'w') as f: |
387 | f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set)) |
388 | |
389 | # Write the order file. |
390 | with open(opts.output_path, 'w') as f: |
391 | f.write("\n".join(result)) |
392 | f.write("\n") |
393 | |
394 | return 0 |
395 | |
396 | commands = {'clean' : clean, |
397 | 'merge' : merge, |
398 | 'dtrace' : dtrace, |
399 | 'cc1' : cc1, |
400 | 'gen-order-file' : genOrderFile} |
401 | |
402 | def main(): |
403 | f = commands[sys.argv[1]] |
404 | sys.exit(f(sys.argv[2:])) |
405 | |
406 | if __name__ == '__main__': |
407 | main() |
408 | |