1 | #!/usr/bin/env python |
2 | # |
3 | #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# |
4 | # |
5 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
6 | # See https://llvm.org/LICENSE.txt for license information. |
7 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
8 | # |
9 | #===------------------------------------------------------------------------===# |
10 | |
11 | r""" |
12 | clang-format git integration |
13 | ============================ |
14 | |
15 | This file provides a clang-format integration for git. Put it somewhere in your |
16 | path and ensure that it is executable. Then, "git clang-format" will invoke |
17 | clang-format on the changes in current files or a specific commit. |
18 | |
19 | For further details, run: |
20 | git clang-format -h |
21 | |
22 | Requires Python 2.7 or Python 3 |
23 | """ |
24 | |
25 | from __future__ import absolute_import, division, print_function |
26 | import argparse |
27 | import collections |
28 | import contextlib |
29 | import errno |
30 | import os |
31 | import re |
32 | import subprocess |
33 | import sys |
34 | |
35 | usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' |
36 | |
37 | desc = ''' |
38 | If zero or one commits are given, run clang-format on all lines that differ |
39 | between the working directory and <commit>, which defaults to HEAD. Changes are |
40 | only applied to the working directory. |
41 | |
42 | If two commits are given (requires --diff), run clang-format on all lines in the |
43 | second <commit> that differ from the first <commit>. |
44 | |
45 | The following git-config settings set the default of the corresponding option: |
46 | clangFormat.binary |
47 | clangFormat.commit |
48 | clangFormat.extension |
49 | clangFormat.style |
50 | ''' |
51 | |
52 | # Name of the temporary index file in which save the output of clang-format. |
53 | # This file is created within the .git directory. |
54 | temp_index_basename = 'clang-format-index' |
55 | |
56 | |
57 | Range = collections.namedtuple('Range', 'start, count') |
58 | |
59 | |
60 | def main(): |
61 | config = load_git_config() |
62 | |
63 | # In order to keep '--' yet allow options after positionals, we need to |
64 | # check for '--' ourselves. (Setting nargs='*' throws away the '--', while |
65 | # nargs=argparse.REMAINDER disallows options after positionals.) |
66 | argv = sys.argv[1:] |
67 | try: |
68 | idx = argv.index('--') |
69 | except ValueError: |
70 | dash_dash = [] |
71 | else: |
72 | dash_dash = argv[idx:] |
73 | argv = argv[:idx] |
74 | |
75 | default_extensions = ','.join([ |
76 | # From clang/lib/Frontend/FrontendOptions.cpp, all lower case |
77 | 'c', 'h', # C |
78 | 'm', # ObjC |
79 | 'mm', # ObjC++ |
80 | 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ |
81 | 'cu', # CUDA |
82 | # Other languages that clang-format supports |
83 | 'proto', 'protodevel', # Protocol Buffers |
84 | 'java', # Java |
85 | 'js', # JavaScript |
86 | 'ts', # TypeScript |
87 | ]) |
88 | |
89 | p = argparse.ArgumentParser( |
90 | usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, |
91 | description=desc) |
92 | p.add_argument('--binary', |
93 | default=config.get('clangformat.binary', 'clang-format'), |
94 | help='path to clang-format'), |
95 | p.add_argument('--commit', |
96 | default=config.get('clangformat.commit', 'HEAD'), |
97 | help='default commit to use if none is specified'), |
98 | p.add_argument('--diff', action='store_true', |
99 | help='print a diff instead of applying the changes') |
100 | p.add_argument('--extensions', |
101 | default=config.get('clangformat.extensions', |
102 | default_extensions), |
103 | help=('comma-separated list of file extensions to format, ' |
104 | 'excluding the period and case-insensitive')), |
105 | p.add_argument('-f', '--force', action='store_true', |
106 | help='allow changes to unstaged files') |
107 | p.add_argument('-p', '--patch', action='store_true', |
108 | help='select hunks interactively') |
109 | p.add_argument('-q', '--quiet', action='count', default=0, |
110 | help='print less information') |
111 | p.add_argument('--style', |
112 | default=config.get('clangformat.style', None), |
113 | help='passed to clang-format'), |
114 | p.add_argument('-v', '--verbose', action='count', default=0, |
115 | help='print extra information') |
116 | # We gather all the remaining positional arguments into 'args' since we need |
117 | # to use some heuristics to determine whether or not <commit> was present. |
118 | # However, to print pretty messages, we make use of metavar and help. |
119 | p.add_argument('args', nargs='*', metavar='<commit>', |
120 | help='revision from which to compute the diff') |
121 | p.add_argument('ignored', nargs='*', metavar='<file>...', |
122 | help='if specified, only consider differences in these files') |
123 | opts = p.parse_args(argv) |
124 | |
125 | opts.verbose -= opts.quiet |
126 | del opts.quiet |
127 | |
128 | commits, files = interpret_args(opts.args, dash_dash, opts.commit) |
129 | if len(commits) > 1: |
130 | if not opts.diff: |
131 | die('--diff is required when two commits are given') |
132 | else: |
133 | if len(commits) > 2: |
134 | die('at most two commits allowed; %d given' % len(commits)) |
135 | changed_lines = compute_diff_and_extract_lines(commits, files) |
136 | if opts.verbose >= 1: |
137 | ignored_files = set(changed_lines) |
138 | filter_by_extension(changed_lines, opts.extensions.lower().split(',')) |
139 | if opts.verbose >= 1: |
140 | ignored_files.difference_update(changed_lines) |
141 | if ignored_files: |
142 | print('Ignoring changes in the following files (wrong extension):') |
143 | for filename in ignored_files: |
144 | print(' %s' % filename) |
145 | if changed_lines: |
146 | print('Running clang-format on the following files:') |
147 | for filename in changed_lines: |
148 | print(' %s' % filename) |
149 | if not changed_lines: |
150 | print('no modified files to format') |
151 | return |
152 | # The computed diff outputs absolute paths, so we must cd before accessing |
153 | # those files. |
154 | cd_to_toplevel() |
155 | if len(commits) > 1: |
156 | old_tree = commits[1] |
157 | new_tree = run_clang_format_and_save_to_tree(changed_lines, |
158 | revision=commits[1], |
159 | binary=opts.binary, |
160 | style=opts.style) |
161 | else: |
162 | old_tree = create_tree_from_workdir(changed_lines) |
163 | new_tree = run_clang_format_and_save_to_tree(changed_lines, |
164 | binary=opts.binary, |
165 | style=opts.style) |
166 | if opts.verbose >= 1: |
167 | print('old tree: %s' % old_tree) |
168 | print('new tree: %s' % new_tree) |
169 | if old_tree == new_tree: |
170 | if opts.verbose >= 0: |
171 | print('clang-format did not modify any files') |
172 | elif opts.diff: |
173 | print_diff(old_tree, new_tree) |
174 | else: |
175 | changed_files = apply_changes(old_tree, new_tree, force=opts.force, |
176 | patch_mode=opts.patch) |
177 | if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: |
178 | print('changed files:') |
179 | for filename in changed_files: |
180 | print(' %s' % filename) |
181 | |
182 | |
183 | def load_git_config(non_string_options=None): |
184 | """Return the git configuration as a dictionary. |
185 | |
186 | All options are assumed to be strings unless in `non_string_options`, in which |
187 | is a dictionary mapping option name (in lower case) to either "--bool" or |
188 | "--int".""" |
189 | if non_string_options is None: |
190 | non_string_options = {} |
191 | out = {} |
192 | for entry in run('git', 'config', '--list', '--null').split('\0'): |
193 | if entry: |
194 | name, value = entry.split('\n', 1) |
195 | if name in non_string_options: |
196 | value = run('git', 'config', non_string_options[name], name) |
197 | out[name] = value |
198 | return out |
199 | |
200 | |
201 | def interpret_args(args, dash_dash, default_commit): |
202 | """Interpret `args` as "[commits] [--] [files]" and return (commits, files). |
203 | |
204 | It is assumed that "--" and everything that follows has been removed from |
205 | args and placed in `dash_dash`. |
206 | |
207 | If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its |
208 | left (if present) are taken as commits. Otherwise, the arguments are checked |
209 | from left to right if they are commits or files. If commits are not given, |
210 | a list with `default_commit` is used.""" |
211 | if dash_dash: |
212 | if len(args) == 0: |
213 | commits = [default_commit] |
214 | else: |
215 | commits = args |
216 | for commit in commits: |
217 | object_type = get_object_type(commit) |
218 | if object_type not in ('commit', 'tag'): |
219 | if object_type is None: |
220 | die("'%s' is not a commit" % commit) |
221 | else: |
222 | die("'%s' is a %s, but a commit was expected" % (commit, object_type)) |
223 | files = dash_dash[1:] |
224 | elif args: |
225 | commits = [] |
226 | while args: |
227 | if not disambiguate_revision(args[0]): |
228 | break |
229 | commits.append(args.pop(0)) |
230 | if not commits: |
231 | commits = [default_commit] |
232 | files = args |
233 | else: |
234 | commits = [default_commit] |
235 | files = [] |
236 | return commits, files |
237 | |
238 | |
239 | def disambiguate_revision(value): |
240 | """Returns True if `value` is a revision, False if it is a file, or dies.""" |
241 | # If `value` is ambiguous (neither a commit nor a file), the following |
242 | # command will die with an appropriate error message. |
243 | run('git', 'rev-parse', value, verbose=False) |
244 | object_type = get_object_type(value) |
245 | if object_type is None: |
246 | return False |
247 | if object_type in ('commit', 'tag'): |
248 | return True |
249 | die('`%s` is a %s, but a commit or filename was expected' % |
250 | (value, object_type)) |
251 | |
252 | |
253 | def get_object_type(value): |
254 | """Returns a string description of an object's type, or None if it is not |
255 | a valid git object.""" |
256 | cmd = ['git', 'cat-file', '-t', value] |
257 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
258 | stdout, stderr = p.communicate() |
259 | if p.returncode != 0: |
260 | return None |
261 | return convert_string(stdout.strip()) |
262 | |
263 | |
264 | def compute_diff_and_extract_lines(commits, files): |
265 | """Calls compute_diff() followed by extract_lines().""" |
266 | diff_process = compute_diff(commits, files) |
267 | changed_lines = extract_lines(diff_process.stdout) |
268 | diff_process.stdout.close() |
269 | diff_process.wait() |
270 | if diff_process.returncode != 0: |
271 | # Assume error was already printed to stderr. |
272 | sys.exit(2) |
273 | return changed_lines |
274 | |
275 | |
276 | def compute_diff(commits, files): |
277 | """Return a subprocess object producing the diff from `commits`. |
278 | |
279 | The return value's `stdin` file object will produce a patch with the |
280 | differences between the working directory and the first commit if a single |
281 | one was specified, or the difference between both specified commits, filtered |
282 | on `files` (if non-empty). Zero context lines are used in the patch.""" |
283 | git_tool = 'diff-index' |
284 | if len(commits) > 1: |
285 | git_tool = 'diff-tree' |
286 | cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] |
287 | cmd.extend(files) |
288 | p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
289 | p.stdin.close() |
290 | return p |
291 | |
292 | |
293 | def extract_lines(patch_file): |
294 | """Extract the changed lines in `patch_file`. |
295 | |
296 | The return value is a dictionary mapping filename to a list of (start_line, |
297 | line_count) pairs. |
298 | |
299 | The input must have been produced with ``-U0``, meaning unidiff format with |
300 | zero lines of context. The return value is a dict mapping filename to a |
301 | list of line `Range`s.""" |
302 | matches = {} |
303 | for line in patch_file: |
304 | line = convert_string(line) |
305 | match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) |
306 | if match: |
307 | filename = match.group(1).rstrip('\r\n') |
308 | match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) |
309 | if match: |
310 | start_line = int(match.group(1)) |
311 | line_count = 1 |
312 | if match.group(3): |
313 | line_count = int(match.group(3)) |
314 | if line_count > 0: |
315 | matches.setdefault(filename, []).append(Range(start_line, line_count)) |
316 | return matches |
317 | |
318 | |
319 | def filter_by_extension(dictionary, allowed_extensions): |
320 | """Delete every key in `dictionary` that doesn't have an allowed extension. |
321 | |
322 | `allowed_extensions` must be a collection of lowercase file extensions, |
323 | excluding the period.""" |
324 | allowed_extensions = frozenset(allowed_extensions) |
325 | for filename in list(dictionary.keys()): |
326 | base_ext = filename.rsplit('.', 1) |
327 | if len(base_ext) == 1 and '' in allowed_extensions: |
328 | continue |
329 | if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: |
330 | del dictionary[filename] |
331 | |
332 | |
333 | def cd_to_toplevel(): |
334 | """Change to the top level of the git repository.""" |
335 | toplevel = run('git', 'rev-parse', '--show-toplevel') |
336 | os.chdir(toplevel) |
337 | |
338 | |
339 | def create_tree_from_workdir(filenames): |
340 | """Create a new git tree with the given files from the working directory. |
341 | |
342 | Returns the object ID (SHA-1) of the created tree.""" |
343 | return create_tree(filenames, '--stdin') |
344 | |
345 | |
346 | def run_clang_format_and_save_to_tree(changed_lines, revision=None, |
347 | binary='clang-format', style=None): |
348 | """Run clang-format on each file and save the result to a git tree. |
349 | |
350 | Returns the object ID (SHA-1) of the created tree.""" |
351 | def iteritems(container): |
352 | try: |
353 | return container.iteritems() # Python 2 |
354 | except AttributeError: |
355 | return container.items() # Python 3 |
356 | def index_info_generator(): |
357 | for filename, line_ranges in iteritems(changed_lines): |
358 | if revision: |
359 | git_metadata_cmd = ['git', 'ls-tree', |
360 | '%s:%s' % (revision, os.path.dirname(filename)), |
361 | os.path.basename(filename)] |
362 | git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, |
363 | stdout=subprocess.PIPE) |
364 | stdout = git_metadata.communicate()[0] |
365 | mode = oct(int(stdout.split()[0], 8)) |
366 | else: |
367 | mode = oct(os.stat(filename).st_mode) |
368 | # Adjust python3 octal format so that it matches what git expects |
369 | if mode.startswith('0o'): |
370 | mode = '0' + mode[2:] |
371 | blob_id = clang_format_to_blob(filename, line_ranges, |
372 | revision=revision, |
373 | binary=binary, |
374 | style=style) |
375 | yield '%s %s\t%s' % (mode, blob_id, filename) |
376 | return create_tree(index_info_generator(), '--index-info') |
377 | |
378 | |
379 | def create_tree(input_lines, mode): |
380 | """Create a tree object from the given input. |
381 | |
382 | If mode is '--stdin', it must be a list of filenames. If mode is |
383 | '--index-info' is must be a list of values suitable for "git update-index |
384 | --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode |
385 | is invalid.""" |
386 | assert mode in ('--stdin', '--index-info') |
387 | cmd = ['git', 'update-index', '--add', '-z', mode] |
388 | with temporary_index_file(): |
389 | p = subprocess.Popen(cmd, stdin=subprocess.PIPE) |
390 | for line in input_lines: |
391 | p.stdin.write(to_bytes('%s\0' % line)) |
392 | p.stdin.close() |
393 | if p.wait() != 0: |
394 | die('`%s` failed' % ' '.join(cmd)) |
395 | tree_id = run('git', 'write-tree') |
396 | return tree_id |
397 | |
398 | |
399 | def clang_format_to_blob(filename, line_ranges, revision=None, |
400 | binary='clang-format', style=None): |
401 | """Run clang-format on the given file and save the result to a git blob. |
402 | |
403 | Runs on the file in `revision` if not None, or on the file in the working |
404 | directory if `revision` is None. |
405 | |
406 | Returns the object ID (SHA-1) of the created blob.""" |
407 | clang_format_cmd = [binary] |
408 | if style: |
409 | clang_format_cmd.extend(['-style='+style]) |
410 | clang_format_cmd.extend([ |
411 | '-lines=%s:%s' % (start_line, start_line+line_count-1) |
412 | for start_line, line_count in line_ranges]) |
413 | if revision: |
414 | clang_format_cmd.extend(['-assume-filename='+filename]) |
415 | git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] |
416 | git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, |
417 | stdout=subprocess.PIPE) |
418 | git_show.stdin.close() |
419 | clang_format_stdin = git_show.stdout |
420 | else: |
421 | clang_format_cmd.extend([filename]) |
422 | git_show = None |
423 | clang_format_stdin = subprocess.PIPE |
424 | try: |
425 | clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, |
426 | stdout=subprocess.PIPE) |
427 | if clang_format_stdin == subprocess.PIPE: |
428 | clang_format_stdin = clang_format.stdin |
429 | except OSError as e: |
430 | if e.errno == errno.ENOENT: |
431 | die('cannot find executable "%s"' % binary) |
432 | else: |
433 | raise |
434 | clang_format_stdin.close() |
435 | hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] |
436 | hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, |
437 | stdout=subprocess.PIPE) |
438 | clang_format.stdout.close() |
439 | stdout = hash_object.communicate()[0] |
440 | if hash_object.returncode != 0: |
441 | die('`%s` failed' % ' '.join(hash_object_cmd)) |
442 | if clang_format.wait() != 0: |
443 | die('`%s` failed' % ' '.join(clang_format_cmd)) |
444 | if git_show and git_show.wait() != 0: |
445 | die('`%s` failed' % ' '.join(git_show_cmd)) |
446 | return convert_string(stdout).rstrip('\r\n') |
447 | |
448 | |
449 | @contextlib.contextmanager |
450 | def temporary_index_file(tree=None): |
451 | """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting |
452 | the file afterward.""" |
453 | index_path = create_temporary_index(tree) |
454 | old_index_path = os.environ.get('GIT_INDEX_FILE') |
455 | os.environ['GIT_INDEX_FILE'] = index_path |
456 | try: |
457 | yield |
458 | finally: |
459 | if old_index_path is None: |
460 | del os.environ['GIT_INDEX_FILE'] |
461 | else: |
462 | os.environ['GIT_INDEX_FILE'] = old_index_path |
463 | os.remove(index_path) |
464 | |
465 | |
466 | def create_temporary_index(tree=None): |
467 | """Create a temporary index file and return the created file's path. |
468 | |
469 | If `tree` is not None, use that as the tree to read in. Otherwise, an |
470 | empty index is created.""" |
471 | gitdir = run('git', 'rev-parse', '--git-dir') |
472 | path = os.path.join(gitdir, temp_index_basename) |
473 | if tree is None: |
474 | tree = '--empty' |
475 | run('git', 'read-tree', '--index-output='+path, tree) |
476 | return path |
477 | |
478 | |
479 | def print_diff(old_tree, new_tree): |
480 | """Print the diff between the two trees to stdout.""" |
481 | # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output |
482 | # is expected to be viewed by the user, and only the former does nice things |
483 | # like color and pagination. |
484 | # |
485 | # We also only print modified files since `new_tree` only contains the files |
486 | # that were modified, so unmodified files would show as deleted without the |
487 | # filter. |
488 | subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, |
489 | '--']) |
490 | |
491 | |
492 | def apply_changes(old_tree, new_tree, force=False, patch_mode=False): |
493 | """Apply the changes in `new_tree` to the working directory. |
494 | |
495 | Bails if there are local changes in those files and not `force`. If |
496 | `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" |
497 | changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', |
498 | '--name-only', old_tree, |
499 | new_tree).rstrip('\0').split('\0') |
500 | if not force: |
501 | unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) |
502 | if unstaged_files: |
503 | print('The following files would be modified but ' |
504 | 'have unstaged changes:', file=sys.stderr) |
505 | print(unstaged_files, file=sys.stderr) |
506 | print('Please commit, stage, or stash them first.', file=sys.stderr) |
507 | sys.exit(2) |
508 | if patch_mode: |
509 | # In patch mode, we could just as well create an index from the new tree |
510 | # and checkout from that, but then the user will be presented with a |
511 | # message saying "Discard ... from worktree". Instead, we use the old |
512 | # tree as the index and checkout from new_tree, which gives the slightly |
513 | # better message, "Apply ... to index and worktree". This is not quite |
514 | # right, since it won't be applied to the user's index, but oh well. |
515 | with temporary_index_file(old_tree): |
516 | subprocess.check_call(['git', 'checkout', '--patch', new_tree]) |
517 | index_tree = old_tree |
518 | else: |
519 | with temporary_index_file(new_tree): |
520 | run('git', 'checkout-index', '-a', '-f') |
521 | return changed_files |
522 | |
523 | |
524 | def run(*args, **kwargs): |
525 | stdin = kwargs.pop('stdin', '') |
526 | verbose = kwargs.pop('verbose', True) |
527 | strip = kwargs.pop('strip', True) |
528 | for name in kwargs: |
529 | raise TypeError("run() got an unexpected keyword argument '%s'" % name) |
530 | p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
531 | stdin=subprocess.PIPE) |
532 | stdout, stderr = p.communicate(input=stdin) |
533 | |
534 | stdout = convert_string(stdout) |
535 | stderr = convert_string(stderr) |
536 | |
537 | if p.returncode == 0: |
538 | if stderr: |
539 | if verbose: |
540 | print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) |
541 | print(stderr.rstrip(), file=sys.stderr) |
542 | if strip: |
543 | stdout = stdout.rstrip('\r\n') |
544 | return stdout |
545 | if verbose: |
546 | print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) |
547 | if stderr: |
548 | print(stderr.rstrip(), file=sys.stderr) |
549 | sys.exit(2) |
550 | |
551 | |
552 | def die(message): |
553 | print('error:', message, file=sys.stderr) |
554 | sys.exit(2) |
555 | |
556 | |
557 | def to_bytes(str_input): |
558 | # Encode to UTF-8 to get binary data. |
559 | if isinstance(str_input, bytes): |
560 | return str_input |
561 | return str_input.encode('utf-8') |
562 | |
563 | |
564 | def to_string(bytes_input): |
565 | if isinstance(bytes_input, str): |
566 | return bytes_input |
567 | return bytes_input.encode('utf-8') |
568 | |
569 | |
570 | def convert_string(bytes_input): |
571 | try: |
572 | return to_string(bytes_input.decode('utf-8')) |
573 | except AttributeError: # 'str' object has no attribute 'decode'. |
574 | return str(bytes_input) |
575 | except UnicodeError: |
576 | return str(bytes_input) |
577 | |
578 | if __name__ == '__main__': |
579 | main() |
580 | |