1 | # -*- coding: utf-8 -*- |
2 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
3 | # See https://llvm.org/LICENSE.txt for license information. |
4 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 | """ This module is responsible for to parse a compiler invocation. """ |
6 | |
7 | import re |
8 | import os |
9 | import collections |
10 | |
11 | __all__ = ['split_command', 'classify_source', 'compiler_language'] |
12 | |
13 | # Ignored compiler options map for compilation database creation. |
14 | # The map is used in `split_command` method. (Which does ignore and classify |
15 | # parameters.) Please note, that these are not the only parameters which |
16 | # might be ignored. |
17 | # |
18 | # Keys are the option name, value number of options to skip |
19 | IGNORED_FLAGS = { |
20 | # compiling only flag, ignored because the creator of compilation |
21 | # database will explicitly set it. |
22 | '-c': 0, |
23 | # preprocessor macros, ignored because would cause duplicate entries in |
24 | # the output (the only difference would be these flags). this is actual |
25 | # finding from users, who suffered longer execution time caused by the |
26 | # duplicates. |
27 | '-MD': 0, |
28 | '-MMD': 0, |
29 | '-MG': 0, |
30 | '-MP': 0, |
31 | '-MF': 1, |
32 | '-MT': 1, |
33 | '-MQ': 1, |
34 | # linker options, ignored because for compilation database will contain |
35 | # compilation commands only. so, the compiler would ignore these flags |
36 | # anyway. the benefit to get rid of them is to make the output more |
37 | # readable. |
38 | '-static': 0, |
39 | '-shared': 0, |
40 | '-s': 0, |
41 | '-rdynamic': 0, |
42 | '-l': 1, |
43 | '-L': 1, |
44 | '-u': 1, |
45 | '-z': 1, |
46 | '-T': 1, |
47 | '-Xlinker': 1 |
48 | } |
49 | |
50 | # Known C/C++ compiler executable name patterns |
51 | COMPILER_PATTERNS = frozenset([ |
52 | re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), |
53 | re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), |
54 | re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), |
55 | re.compile(r'^llvm-g(cc|\+\+)$'), |
56 | ]) |
57 | |
58 | |
59 | def split_command(command): |
60 | """ Returns a value when the command is a compilation, None otherwise. |
61 | |
62 | The value on success is a named tuple with the following attributes: |
63 | |
64 | files: list of source files |
65 | flags: list of compile options |
66 | compiler: string value of 'c' or 'c++' """ |
67 | |
68 | # the result of this method |
69 | result = collections.namedtuple('Compilation', |
70 | ['compiler', 'flags', 'files']) |
71 | result.compiler = compiler_language(command) |
72 | result.flags = [] |
73 | result.files = [] |
74 | # quit right now, if the program was not a C/C++ compiler |
75 | if not result.compiler: |
76 | return None |
77 | # iterate on the compile options |
78 | args = iter(command[1:]) |
79 | for arg in args: |
80 | # quit when compilation pass is not involved |
81 | if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: |
82 | return None |
83 | # ignore some flags |
84 | elif arg in IGNORED_FLAGS: |
85 | count = IGNORED_FLAGS[arg] |
86 | for _ in range(count): |
87 | next(args) |
88 | elif re.match(r'^-(l|L|Wl,).+', arg): |
89 | pass |
90 | # some parameters could look like filename, take as compile option |
91 | elif arg in {'-D', '-I'}: |
92 | result.flags.extend([arg, next(args)]) |
93 | # parameter which looks source file is taken... |
94 | elif re.match(r'^[^-].+', arg) and classify_source(arg): |
95 | result.files.append(arg) |
96 | # and consider everything else as compile option. |
97 | else: |
98 | result.flags.append(arg) |
99 | # do extra check on number of source files |
100 | return result if result.files else None |
101 | |
102 | |
103 | def classify_source(filename, c_compiler=True): |
104 | """ Return the language from file name extension. """ |
105 | |
106 | mapping = { |
107 | '.c': 'c' if c_compiler else 'c++', |
108 | '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', |
109 | '.ii': 'c++-cpp-output', |
110 | '.m': 'objective-c', |
111 | '.mi': 'objective-c-cpp-output', |
112 | '.mm': 'objective-c++', |
113 | '.mii': 'objective-c++-cpp-output', |
114 | '.C': 'c++', |
115 | '.cc': 'c++', |
116 | '.CC': 'c++', |
117 | '.cp': 'c++', |
118 | '.cpp': 'c++', |
119 | '.cxx': 'c++', |
120 | '.c++': 'c++', |
121 | '.C++': 'c++', |
122 | '.txx': 'c++' |
123 | } |
124 | |
125 | __, extension = os.path.splitext(os.path.basename(filename)) |
126 | return mapping.get(extension) |
127 | |
128 | |
129 | def compiler_language(command): |
130 | """ A predicate to decide the command is a compiler call or not. |
131 | |
132 | Returns 'c' or 'c++' when it match. None otherwise. """ |
133 | |
134 | cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') |
135 | |
136 | if command: |
137 | executable = os.path.basename(command[0]) |
138 | if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): |
139 | return 'c++' if cplusplus.match(executable) else 'c' |
140 | return None |
141 | |