dump_ast_matchers.py source code [clang_source_code/docs/tools/dump_ast

1	#!/usr/bin/env python
2	# A tool to parse ASTMatchers.h and update the documentation in
3	# ../LibASTMatchersReference.html automatically. Run from the
4	# directory in which this file is located to update the docs.
5
6	import collections
7	import re
8	try:
9	from urllib.request import urlopen
10	except ImportError:
11	from urllib2 import urlopen
12
13	MATCHERS_FILE = '../../include/clang/ASTMatchers/ASTMatchers.h'
14
15	# Each matcher is documented in one row of the form:
16	# result \| name \| argA
17	# The subsequent row contains the documentation and is hidden by default,
18	# becoming visible via javascript when the user clicks the matcher name.
19	TD_TEMPLATE="""
20	<tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr>
21	<tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr>
22	"""
23
24	# We categorize the matchers into these three categories in the reference:
25	node_matchers = {}
26	narrowing_matchers = {}
27	traversal_matchers = {}
28
29	# We output multiple rows per matcher if the matcher can be used on multiple
30	# node types. Thus, we need a new id per row to control the documentation
31	# pop-up. ids[name] keeps track of those ids.
32	ids = collections.defaultdict(int)
33
34	# Cache for doxygen urls we have already verified.
35	doxygen_probes = {}
36
37	def esc(text):
38	"""Escape any html in the given text."""
39	text = re.sub(r'&', '&', text)
40	text = re.sub(r'<', '<', text)
41	text = re.sub(r'>', '>', text)
42	def link_if_exists(m):
43	name = m.group(1)
44	url = 'https://clang.llvm.org/doxygen/classclang_1_1%s.html' % name
45	if url not in doxygen_probes:
46	try:
47	print('Probing %s...' % url)
48	urlopen(url)
49	doxygen_probes[url] = True
50	except:
51	doxygen_probes[url] = False
52	if doxygen_probes[url]:
53	return r'Matcher<<a href="%s">%s</a>>' % (url, name)
54	else:
55	return m.group(0)
56	text = re.sub(
57	r'Matcher<([^\*&]+)>', link_if_exists, text)
58	return text
59
60	def extract_result_types(comment):
61	"""Extracts a list of result types from the given comment.
62
63	We allow annotations in the comment of the matcher to specify what
64	nodes a matcher can match on. Those comments have the form:
65	Usable as: Any Matcher \| (Matcher<T1>[, Matcher<t2>[, ...]])
66
67	Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...].
68	Returns the empty list if no 'Usable as' specification could be
69	parsed.
70	"""
71	result_types = []
72	m = re.search(r'Usable as: Any Matcher[\s\n]*$', comment, re.S)
73	if m:
74	return ['*']
75	while True:
76	m = re.match(r'^(.)Matcher<([^>]+)>\s,?[\s\n]*$', comment, re.S)
77	if not m:
78	if re.search(r'Usable as:\s*$', comment):
79	return result_types
80	else:
81	return None
82	result_types += [m.group(2)]
83	comment = m.group(1)
84
85	def strip_doxygen(comment):
86	"""Returns the given comment without \-escaped words."""
87	# If there is only a doxygen keyword in the line, delete the whole line.
88	comment = re.sub(r'^\\[^\s]+\n', r'', comment, flags=re.M)
89
90	# If there is a doxygen \see command, change the \see prefix into "See also:".
91	# FIXME: it would be better to turn this into a link to the target instead.
92	comment = re.sub(r'\\see', r'See also:', comment)
93
94	# Delete the doxygen command and the following whitespace.
95	comment = re.sub(r'\\[^\s]+\s+', r'', comment)
96	return comment
97
98	def unify_arguments(args):
99	"""Gets rid of anything the user doesn't care about in the argument list."""
100	args = re.sub(r'internal::', r'', args)
101	args = re.sub(r'extern const\s+(.*)&', r'\1 ', args)
102	args = re.sub(r'&', r' ', args)
103	args = re.sub(r'(^\|\s)M\d?(\s)', r'\1Matcher<*>\2', args)
104	return args
105
106	def add_matcher(result_type, name, args, comment, is_dyncast=False):
107	"""Adds a matcher to one of our categories."""
108	if name == 'id':
109	# FIXME: Figure out whether we want to support the 'id' matcher.
110	return
111	matcher_id = '%s%d' % (name, ids[name])
112	ids[name] += 1
113	args = unify_arguments(args)
114	matcher_html = TD_TEMPLATE % {
115	'result': esc('Matcher<%s>' % result_type),
116	'name': name,
117	'args': esc(args),
118	'comment': esc(strip_doxygen(comment)),
119	'id': matcher_id,
120	}
121	if is_dyncast:
122	node_matchers[result_type + name] = matcher_html
123	# Use a heuristic to figure out whether a matcher is a narrowing or
124	# traversal matcher. By default, matchers that take other matchers as
125	# arguments (and are not node matchers) do traversal. We specifically
126	# exclude known narrowing matchers that also take other matchers as
127	# arguments.
128	elif ('Matcher<' not in args or
129	name in ['allOf', 'anyOf', 'anything', 'unless']):
130	narrowing_matchers[result_type + name + esc(args)] = matcher_html
131	else:
132	traversal_matchers[result_type + name + esc(args)] = matcher_html
133
134	def act_on_decl(declaration, comment, allowed_types):
135	"""Parse the matcher out of the given declaration and comment.
136
137	If 'allowed_types' is set, it contains a list of node types the matcher
138	can match on, as extracted from the static type asserts in the matcher
139	definition.
140	"""
141	if declaration.strip():
142	# Node matchers are defined by writing:
143	# VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name;
144	m = re.match(r""".Variadic(?:DynCast)?AllOfMatcher\s<
145	\s([^\s,]+)\s(?:,
146	\s([^\s>]+)\s)?>
147	\s([^\s;]+)\s;\s*$""", declaration, flags=re.X)
148	if m:
149	result, inner, name = m.groups()
150	if not inner:
151	inner = result
152	add_matcher(result, name, 'Matcher<%s>...' % inner,
153	comment, is_dyncast=True)
154	return
155
156	# Special case of type matchers:
157	# AstTypeMatcher<ArgumentType> name
158	m = re.match(r""".AstTypeMatcher\s<
159	\s([^\s>]+)\s>
160	\s([^\s;]+)\s;\s*$""", declaration, flags=re.X)
161	if m:
162	inner, name = m.groups()
163	add_matcher('Type', name, 'Matcher<%s>...' % inner,
164	comment, is_dyncast=True)
165	# FIXME: re-enable once we have implemented casting on the TypeLoc
166	# hierarchy.
167	# add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner,
168	# comment, is_dyncast=True)
169	return
170
171	# Parse the various matcher definition macros.
172	m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
173	\s([^\s,]+\s),
174	\s(?:[^\s,]+\s),
175	\sAST_POLYMORPHIC_SUPPORTED_TYPES$([^)])$
176	\)\s;\s$""", declaration, flags=re.X)
177	if m:
178	loc, name, results = m.groups()[0:3]
179	result_types = [r.strip() for r in results.split(',')]
180
181	comment_result_types = extract_result_types(comment)
182	if (comment_result_types and
183	sorted(result_types) != sorted(comment_result_types)):
184	raise Exception('Inconsistent documentation for: %s' % name)
185	for result_type in result_types:
186	add_matcher(result_type, name, 'Matcher<Type>', comment)
187	# if loc:
188	# add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>',
189	# comment)
190	return
191
192	m = re.match(r"""^\s*AST_POLYMORPHIC_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
193	\s([^\s,]+)\s,
194	\sAST_POLYMORPHIC_SUPPORTED_TYPES$([^)])$
195	(?:,\s([^\s,]+)\s
196	,\s([^\s,]+)\s)?
197	(?:,\s([^\s,]+)\s
198	,\s([^\s,]+)\s)?
199	(?:,\s\d+\s)?
200	\)\s{\s$""", declaration, flags=re.X)
201
202	if m:
203	p, n, name, results = m.groups()[0:4]
204	args = m.groups()[4:]
205	result_types = [r.strip() for r in results.split(',')]
206	if allowed_types and allowed_types != result_types:
207	raise Exception('Inconsistent documentation for: %s' % name)
208	if n not in ['', '2']:
209	raise Exception('Cannot parse "%s"' % declaration)
210	args = ', '.join('%s %s' % (args[i], args[i+1])
211	for i in range(0, len(args), 2) if args[i])
212	for result_type in result_types:
213	add_matcher(result_type, name, args, comment)
214	return
215
216	m = re.match(r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\(
217	(?:\s([^\s,]+)\s,)?
218	\s([^\s,]+)\s
219	(?:,\s([^\s,]+)\s
220	,\s([^\s,]+)\s)?
221	(?:,\s([^\s,]+)\s
222	,\s([^\s,]+)\s)?
223	(?:,\s\d+\s)?
224	\)\s{\s$""", declaration, flags=re.X)
225	if m:
226	p, n, result, name = m.groups()[0:4]
227	args = m.groups()[4:]
228	if n not in ['', '2']:
229	raise Exception('Cannot parse "%s"' % declaration)
230	args = ', '.join('%s %s' % (args[i], args[i+1])
231	for i in range(0, len(args), 2) if args[i])
232	add_matcher(result, name, args, comment)
233	return
234
235	m = re.match(r"""^\s*AST_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
236	(?:\s([^\s,]+)\s,)?
237	\s([^\s,]+)\s
238	(?:,\s([^,]+)\s
239	,\s([^\s,]+)\s)?
240	(?:,\s([^\s,]+)\s
241	,\s([^\s,]+)\s)?
242	(?:,\s\d+\s)?
243	\)\s*{""", declaration, flags=re.X)
244	if m:
245	p, n, result, name = m.groups()[0:4]
246	args = m.groups()[4:]
247	if not result:
248	if not allowed_types:
249	raise Exception('Did not find allowed result types for: %s' % name)
250	result_types = allowed_types
251	else:
252	result_types = [result]
253	if n not in ['', '2']:
254	raise Exception('Cannot parse "%s"' % declaration)
255	args = ', '.join('%s %s' % (args[i], args[i+1])
256	for i in range(0, len(args), 2) if args[i])
257	for result_type in result_types:
258	add_matcher(result_type, name, args, comment)
259	return
260
261	# Parse ArgumentAdapting matchers.
262	m = re.match(
263	r"""^.ArgumentAdaptingMatcherFunc<.>\s*
264	([a-zA-Z]*);$""",
265	declaration, flags=re.X)
266	if m:
267	name = m.groups()[0]
268	add_matcher('', name, 'Matcher<>', comment)
269	return
270
271	# Parse Variadic functions.
272	m = re.match(
273	r"""^.internal::VariadicFunction\s<\s([^,]+),\s([^,]+),\s[^>]+>\s
274	([a-zA-Z]*);$""",
275	declaration, flags=re.X)
276	if m:
277	result, arg, name = m.groups()[:3]
278	add_matcher(result, name, '%s, ..., %s' % (arg, arg), comment)
279	return
280
281	# Parse Variadic operator matchers.
282	m = re.match(
283	r"""^.VariadicOperatorMatcherFunc\s<\s([^,]+),\s([^\s]+)\s>\s
284	([a-zA-Z]*);$""",
285	declaration, flags=re.X)
286	if m:
287	min_args, max_args, name = m.groups()[:3]
288	if max_args == '1':
289	add_matcher('', name, 'Matcher<>', comment)
290	return
291	elif max_args == 'std::numeric_limits<unsigned>::max()':
292	add_matcher('', name, 'Matcher<>, ..., Matcher<*>', comment)
293	return
294
295
296	# Parse free standing matcher functions, like:
297	# Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) {
298	m = re.match(r"""^\s(.)\s+
299	([^\s\(]+)\s*\(
300	(.*)
301	\)\s*{""", declaration, re.X)
302	if m:
303	result, name, args = m.groups()
304	args = ', '.join(p.strip() for p in args.split(','))
305	m = re.match(r'.*\s+internal::(Bindable)?Matcher<([^>]+)>$', result)
306	if m:
307	result_types = [m.group(2)]
308	else:
309	result_types = extract_result_types(comment)
310	if not result_types:
311	if not comment:
312	# Only overloads don't have their own doxygen comments; ignore those.
313	print('Ignoring "%s"' % name)
314	else:
315	print('Cannot determine result type for "%s"' % name)
316	else:
317	for result_type in result_types:
318	add_matcher(result_type, name, args, comment)
319	else:
320	print('* Unparsable: "' + declaration + '" *')
321
322	def sort_table(matcher_type, matcher_map):
323	"""Returns the sorted html table for the given row map."""
324	table = ''
325	for key in sorted(matcher_map.keys()):
326	table += matcher_map[key] + '\n'
327	return ('<!-- START_%(type)s_MATCHERS -->\n' +
328	'%(table)s' +
329	'<!--END_%(type)s_MATCHERS -->') % {
330	'type': matcher_type,
331	'table': table,
332	}
333
334	# Parse the ast matchers.
335	# We alternate between two modes:
336	# body = True: We parse the definition of a matcher. We need
337	# to parse the full definition before adding a matcher, as the
338	# definition might contain static asserts that specify the result
339	# type.
340	# body = False: We parse the comments and declaration of the matcher.
341	comment = ''
342	declaration = ''
343	allowed_types = []
344	body = False
345	for line in open(MATCHERS_FILE).read().splitlines():
346	if body:
347	if line.strip() and line[0] == '}':
348	if declaration:
349	act_on_decl(declaration, comment, allowed_types)
350	comment = ''
351	declaration = ''
352	allowed_types = []
353	body = False
354	else:
355	m = re.search(r'is_base_of<([^,]+), NodeType>', line)
356	if m and m.group(1):
357	allowed_types += [m.group(1)]
358	continue
359	if line.strip() and line.lstrip()[0] == '/':
360	comment += re.sub(r'^/+\s?', '', line) + '\n'
361	else:
362	declaration += ' ' + line
363	if ((not line.strip()) or
364	line.rstrip()[-1] == ';' or
365	(line.rstrip()[-1] == '{' and line.rstrip()[-3:] != '= {')):
366	if line.strip() and line.rstrip()[-1] == '{':
367	body = True
368	else:
369	act_on_decl(declaration, comment, allowed_types)
370	comment = ''
371	declaration = ''
372	allowed_types = []
373
374	node_matcher_table = sort_table('DECL', node_matchers)
375	narrowing_matcher_table = sort_table('NARROWING', narrowing_matchers)
376	traversal_matcher_table = sort_table('TRAVERSAL', traversal_matchers)
377
378	reference = open('../LibASTMatchersReference.html').read()
379	reference = re.sub(r'<!-- START_DECL_MATCHERS.*END_DECL_MATCHERS -->',
380	node_matcher_table, reference, flags=re.S)
381	reference = re.sub(r'<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->',
382	narrowing_matcher_table, reference, flags=re.S)
383	reference = re.sub(r'<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->',
384	traversal_matcher_table, reference, flags=re.S)
385
386	with open('../LibASTMatchersReference.html', 'wb') as output:
387	output.write(reference)
388
389

Clang Project