1 | // Copyright 2013 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | package astutil |
6 | |
7 | // This file defines utilities for working with source positions. |
8 | |
9 | import ( |
10 | "fmt" |
11 | "go/ast" |
12 | "go/token" |
13 | "sort" |
14 | |
15 | "golang.org/x/tools/internal/typeparams" |
16 | ) |
17 | |
18 | // PathEnclosingInterval returns the node that encloses the source |
19 | // interval [start, end), and all its ancestors up to the AST root. |
20 | // |
21 | // The definition of "enclosing" used by this function considers |
22 | // additional whitespace abutting a node to be enclosed by it. |
23 | // In this example: |
24 | // |
25 | // z := x + y // add them |
26 | // <-A-> |
27 | // <----B-----> |
28 | // |
29 | // the ast.BinaryExpr(+) node is considered to enclose interval B |
30 | // even though its [Pos()..End()) is actually only interval A. |
31 | // This behaviour makes user interfaces more tolerant of imperfect |
32 | // input. |
33 | // |
34 | // This function treats tokens as nodes, though they are not included |
35 | // in the result. e.g. PathEnclosingInterval("+") returns the |
36 | // enclosing ast.BinaryExpr("x + y"). |
37 | // |
38 | // If start==end, the 1-char interval following start is used instead. |
39 | // |
40 | // The 'exact' result is true if the interval contains only path[0] |
41 | // and perhaps some adjacent whitespace. It is false if the interval |
42 | // overlaps multiple children of path[0], or if it contains only |
43 | // interior whitespace of path[0]. |
44 | // In this example: |
45 | // |
46 | // z := x + y // add them |
47 | // <--C--> <---E--> |
48 | // ^ |
49 | // D |
50 | // |
51 | // intervals C, D and E are inexact. C is contained by the |
52 | // z-assignment statement, because it spans three of its children (:=, |
53 | // x, +). So too is the 1-char interval D, because it contains only |
54 | // interior whitespace of the assignment. E is considered interior |
55 | // whitespace of the BlockStmt containing the assignment. |
56 | // |
57 | // The resulting path is never empty; it always contains at least the |
58 | // 'root' *ast.File. Ideally PathEnclosingInterval would reject |
59 | // intervals that lie wholly or partially outside the range of the |
60 | // file, but unfortunately ast.File records only the token.Pos of |
61 | // the 'package' keyword, but not of the start of the file itself. |
62 | func PathEnclosingInterval(root *ast.File, start, end token.Pos) (path []ast.Node, exact bool) { |
63 | // fmt.Printf("EnclosingInterval %d %d\n", start, end) // debugging |
64 | |
65 | // Precondition: node.[Pos..End) and adjoining whitespace contain [start, end). |
66 | var visit func(node ast.Node) bool |
67 | visit = func(node ast.Node) bool { |
68 | path = append(path, node) |
69 | |
70 | nodePos := node.Pos() |
71 | nodeEnd := node.End() |
72 | |
73 | // fmt.Printf("visit(%T, %d, %d)\n", node, nodePos, nodeEnd) // debugging |
74 | |
75 | // Intersect [start, end) with interval of node. |
76 | if start < nodePos { |
77 | start = nodePos |
78 | } |
79 | if end > nodeEnd { |
80 | end = nodeEnd |
81 | } |
82 | |
83 | // Find sole child that contains [start, end). |
84 | children := childrenOf(node) |
85 | l := len(children) |
86 | for i, child := range children { |
87 | // [childPos, childEnd) is unaugmented interval of child. |
88 | childPos := child.Pos() |
89 | childEnd := child.End() |
90 | |
91 | // [augPos, augEnd) is whitespace-augmented interval of child. |
92 | augPos := childPos |
93 | augEnd := childEnd |
94 | if i > 0 { |
95 | augPos = children[i-1].End() // start of preceding whitespace |
96 | } |
97 | if i < l-1 { |
98 | nextChildPos := children[i+1].Pos() |
99 | // Does [start, end) lie between child and next child? |
100 | if start >= augEnd && end <= nextChildPos { |
101 | return false // inexact match |
102 | } |
103 | augEnd = nextChildPos // end of following whitespace |
104 | } |
105 | |
106 | // fmt.Printf("\tchild %d: [%d..%d)\tcontains interval [%d..%d)?\n", |
107 | // i, augPos, augEnd, start, end) // debugging |
108 | |
109 | // Does augmented child strictly contain [start, end)? |
110 | if augPos <= start && end <= augEnd { |
111 | _, isToken := child.(tokenNode) |
112 | return isToken || visit(child) |
113 | } |
114 | |
115 | // Does [start, end) overlap multiple children? |
116 | // i.e. left-augmented child contains start |
117 | // but LR-augmented child does not contain end. |
118 | if start < childEnd && end > augEnd { |
119 | break |
120 | } |
121 | } |
122 | |
123 | // No single child contained [start, end), |
124 | // so node is the result. Is it exact? |
125 | |
126 | // (It's tempting to put this condition before the |
127 | // child loop, but it gives the wrong result in the |
128 | // case where a node (e.g. ExprStmt) and its sole |
129 | // child have equal intervals.) |
130 | if start == nodePos && end == nodeEnd { |
131 | return true // exact match |
132 | } |
133 | |
134 | return false // inexact: overlaps multiple children |
135 | } |
136 | |
137 | // Ensure [start,end) is nondecreasing. |
138 | if start > end { |
139 | start, end = end, start |
140 | } |
141 | |
142 | if start < root.End() && end > root.Pos() { |
143 | if start == end { |
144 | end = start + 1 // empty interval => interval of size 1 |
145 | } |
146 | exact = visit(root) |
147 | |
148 | // Reverse the path: |
149 | for i, l := 0, len(path); i < l/2; i++ { |
150 | path[i], path[l-1-i] = path[l-1-i], path[i] |
151 | } |
152 | } else { |
153 | // Selection lies within whitespace preceding the |
154 | // first (or following the last) declaration in the file. |
155 | // The result nonetheless always includes the ast.File. |
156 | path = append(path, root) |
157 | } |
158 | |
159 | return |
160 | } |
161 | |
162 | // tokenNode is a dummy implementation of ast.Node for a single token. |
163 | // They are used transiently by PathEnclosingInterval but never escape |
164 | // this package. |
165 | type tokenNode struct { |
166 | pos token.Pos |
167 | end token.Pos |
168 | } |
169 | |
170 | func (n tokenNode) Pos() token.Pos { |
171 | return n.pos |
172 | } |
173 | |
174 | func (n tokenNode) End() token.Pos { |
175 | return n.end |
176 | } |
177 | |
178 | func tok(pos token.Pos, len int) ast.Node { |
179 | return tokenNode{pos, pos + token.Pos(len)} |
180 | } |
181 | |
182 | // childrenOf returns the direct non-nil children of ast.Node n. |
183 | // It may include fake ast.Node implementations for bare tokens. |
184 | // it is not safe to call (e.g.) ast.Walk on such nodes. |
185 | func childrenOf(n ast.Node) []ast.Node { |
186 | var children []ast.Node |
187 | |
188 | // First add nodes for all true subtrees. |
189 | ast.Inspect(n, func(node ast.Node) bool { |
190 | if node == n { // push n |
191 | return true // recur |
192 | } |
193 | if node != nil { // push child |
194 | children = append(children, node) |
195 | } |
196 | return false // no recursion |
197 | }) |
198 | |
199 | // Then add fake Nodes for bare tokens. |
200 | switch n := n.(type) { |
201 | case *ast.ArrayType: |
202 | children = append(children, |
203 | tok(n.Lbrack, len("[")), |
204 | tok(n.Elt.End(), len("]"))) |
205 | |
206 | case *ast.AssignStmt: |
207 | children = append(children, |
208 | tok(n.TokPos, len(n.Tok.String()))) |
209 | |
210 | case *ast.BasicLit: |
211 | children = append(children, |
212 | tok(n.ValuePos, len(n.Value))) |
213 | |
214 | case *ast.BinaryExpr: |
215 | children = append(children, tok(n.OpPos, len(n.Op.String()))) |
216 | |
217 | case *ast.BlockStmt: |
218 | children = append(children, |
219 | tok(n.Lbrace, len("{")), |
220 | tok(n.Rbrace, len("}"))) |
221 | |
222 | case *ast.BranchStmt: |
223 | children = append(children, |
224 | tok(n.TokPos, len(n.Tok.String()))) |
225 | |
226 | case *ast.CallExpr: |
227 | children = append(children, |
228 | tok(n.Lparen, len("(")), |
229 | tok(n.Rparen, len(")"))) |
230 | if n.Ellipsis != 0 { |
231 | children = append(children, tok(n.Ellipsis, len("..."))) |
232 | } |
233 | |
234 | case *ast.CaseClause: |
235 | if n.List == nil { |
236 | children = append(children, |
237 | tok(n.Case, len("default"))) |
238 | } else { |
239 | children = append(children, |
240 | tok(n.Case, len("case"))) |
241 | } |
242 | children = append(children, tok(n.Colon, len(":"))) |
243 | |
244 | case *ast.ChanType: |
245 | switch n.Dir { |
246 | case ast.RECV: |
247 | children = append(children, tok(n.Begin, len("<-chan"))) |
248 | case ast.SEND: |
249 | children = append(children, tok(n.Begin, len("chan<-"))) |
250 | case ast.RECV | ast.SEND: |
251 | children = append(children, tok(n.Begin, len("chan"))) |
252 | } |
253 | |
254 | case *ast.CommClause: |
255 | if n.Comm == nil { |
256 | children = append(children, |
257 | tok(n.Case, len("default"))) |
258 | } else { |
259 | children = append(children, |
260 | tok(n.Case, len("case"))) |
261 | } |
262 | children = append(children, tok(n.Colon, len(":"))) |
263 | |
264 | case *ast.Comment: |
265 | // nop |
266 | |
267 | case *ast.CommentGroup: |
268 | // nop |
269 | |
270 | case *ast.CompositeLit: |
271 | children = append(children, |
272 | tok(n.Lbrace, len("{")), |
273 | tok(n.Rbrace, len("{"))) |
274 | |
275 | case *ast.DeclStmt: |
276 | // nop |
277 | |
278 | case *ast.DeferStmt: |
279 | children = append(children, |
280 | tok(n.Defer, len("defer"))) |
281 | |
282 | case *ast.Ellipsis: |
283 | children = append(children, |
284 | tok(n.Ellipsis, len("..."))) |
285 | |
286 | case *ast.EmptyStmt: |
287 | // nop |
288 | |
289 | case *ast.ExprStmt: |
290 | // nop |
291 | |
292 | case *ast.Field: |
293 | // TODO(adonovan): Field.{Doc,Comment,Tag}? |
294 | |
295 | case *ast.FieldList: |
296 | children = append(children, |
297 | tok(n.Opening, len("(")), // or len("[") |
298 | tok(n.Closing, len(")"))) // or len("]") |
299 | |
300 | case *ast.File: |
301 | // TODO test: Doc |
302 | children = append(children, |
303 | tok(n.Package, len("package"))) |
304 | |
305 | case *ast.ForStmt: |
306 | children = append(children, |
307 | tok(n.For, len("for"))) |
308 | |
309 | case *ast.FuncDecl: |
310 | // TODO(adonovan): FuncDecl.Comment? |
311 | |
312 | // Uniquely, FuncDecl breaks the invariant that |
313 | // preorder traversal yields tokens in lexical order: |
314 | // in fact, FuncDecl.Recv precedes FuncDecl.Type.Func. |
315 | // |
316 | // As a workaround, we inline the case for FuncType |
317 | // here and order things correctly. |
318 | // |
319 | children = nil // discard ast.Walk(FuncDecl) info subtrees |
320 | children = append(children, tok(n.Type.Func, len("func"))) |
321 | if n.Recv != nil { |
322 | children = append(children, n.Recv) |
323 | } |
324 | children = append(children, n.Name) |
325 | if tparams := typeparams.ForFuncType(n.Type); tparams != nil { |
326 | children = append(children, tparams) |
327 | } |
328 | if n.Type.Params != nil { |
329 | children = append(children, n.Type.Params) |
330 | } |
331 | if n.Type.Results != nil { |
332 | children = append(children, n.Type.Results) |
333 | } |
334 | if n.Body != nil { |
335 | children = append(children, n.Body) |
336 | } |
337 | |
338 | case *ast.FuncLit: |
339 | // nop |
340 | |
341 | case *ast.FuncType: |
342 | if n.Func != 0 { |
343 | children = append(children, |
344 | tok(n.Func, len("func"))) |
345 | } |
346 | |
347 | case *ast.GenDecl: |
348 | children = append(children, |
349 | tok(n.TokPos, len(n.Tok.String()))) |
350 | if n.Lparen != 0 { |
351 | children = append(children, |
352 | tok(n.Lparen, len("(")), |
353 | tok(n.Rparen, len(")"))) |
354 | } |
355 | |
356 | case *ast.GoStmt: |
357 | children = append(children, |
358 | tok(n.Go, len("go"))) |
359 | |
360 | case *ast.Ident: |
361 | children = append(children, |
362 | tok(n.NamePos, len(n.Name))) |
363 | |
364 | case *ast.IfStmt: |
365 | children = append(children, |
366 | tok(n.If, len("if"))) |
367 | |
368 | case *ast.ImportSpec: |
369 | // TODO(adonovan): ImportSpec.{Doc,EndPos}? |
370 | |
371 | case *ast.IncDecStmt: |
372 | children = append(children, |
373 | tok(n.TokPos, len(n.Tok.String()))) |
374 | |
375 | case *ast.IndexExpr: |
376 | children = append(children, |
377 | tok(n.Lbrack, len("[")), |
378 | tok(n.Rbrack, len("]"))) |
379 | |
380 | case *typeparams.IndexListExpr: |
381 | children = append(children, |
382 | tok(n.Lbrack, len("[")), |
383 | tok(n.Rbrack, len("]"))) |
384 | |
385 | case *ast.InterfaceType: |
386 | children = append(children, |
387 | tok(n.Interface, len("interface"))) |
388 | |
389 | case *ast.KeyValueExpr: |
390 | children = append(children, |
391 | tok(n.Colon, len(":"))) |
392 | |
393 | case *ast.LabeledStmt: |
394 | children = append(children, |
395 | tok(n.Colon, len(":"))) |
396 | |
397 | case *ast.MapType: |
398 | children = append(children, |
399 | tok(n.Map, len("map"))) |
400 | |
401 | case *ast.ParenExpr: |
402 | children = append(children, |
403 | tok(n.Lparen, len("(")), |
404 | tok(n.Rparen, len(")"))) |
405 | |
406 | case *ast.RangeStmt: |
407 | children = append(children, |
408 | tok(n.For, len("for")), |
409 | tok(n.TokPos, len(n.Tok.String()))) |
410 | |
411 | case *ast.ReturnStmt: |
412 | children = append(children, |
413 | tok(n.Return, len("return"))) |
414 | |
415 | case *ast.SelectStmt: |
416 | children = append(children, |
417 | tok(n.Select, len("select"))) |
418 | |
419 | case *ast.SelectorExpr: |
420 | // nop |
421 | |
422 | case *ast.SendStmt: |
423 | children = append(children, |
424 | tok(n.Arrow, len("<-"))) |
425 | |
426 | case *ast.SliceExpr: |
427 | children = append(children, |
428 | tok(n.Lbrack, len("[")), |
429 | tok(n.Rbrack, len("]"))) |
430 | |
431 | case *ast.StarExpr: |
432 | children = append(children, tok(n.Star, len("*"))) |
433 | |
434 | case *ast.StructType: |
435 | children = append(children, tok(n.Struct, len("struct"))) |
436 | |
437 | case *ast.SwitchStmt: |
438 | children = append(children, tok(n.Switch, len("switch"))) |
439 | |
440 | case *ast.TypeAssertExpr: |
441 | children = append(children, |
442 | tok(n.Lparen-1, len(".")), |
443 | tok(n.Lparen, len("(")), |
444 | tok(n.Rparen, len(")"))) |
445 | |
446 | case *ast.TypeSpec: |
447 | // TODO(adonovan): TypeSpec.{Doc,Comment}? |
448 | |
449 | case *ast.TypeSwitchStmt: |
450 | children = append(children, tok(n.Switch, len("switch"))) |
451 | |
452 | case *ast.UnaryExpr: |
453 | children = append(children, tok(n.OpPos, len(n.Op.String()))) |
454 | |
455 | case *ast.ValueSpec: |
456 | // TODO(adonovan): ValueSpec.{Doc,Comment}? |
457 | |
458 | case *ast.BadDecl, *ast.BadExpr, *ast.BadStmt: |
459 | // nop |
460 | } |
461 | |
462 | // TODO(adonovan): opt: merge the logic of ast.Inspect() into |
463 | // the switch above so we can make interleaved callbacks for |
464 | // both Nodes and Tokens in the right order and avoid the need |
465 | // to sort. |
466 | sort.Sort(byPos(children)) |
467 | |
468 | return children |
469 | } |
470 | |
471 | type byPos []ast.Node |
472 | |
473 | func (sl byPos) Len() int { |
474 | return len(sl) |
475 | } |
476 | func (sl byPos) Less(i, j int) bool { |
477 | return sl[i].Pos() < sl[j].Pos() |
478 | } |
479 | func (sl byPos) Swap(i, j int) { |
480 | sl[i], sl[j] = sl[j], sl[i] |
481 | } |
482 | |
483 | // NodeDescription returns a description of the concrete type of n suitable |
484 | // for a user interface. |
485 | // |
486 | // TODO(adonovan): in some cases (e.g. Field, FieldList, Ident, |
487 | // StarExpr) we could be much more specific given the path to the AST |
488 | // root. Perhaps we should do that. |
489 | func NodeDescription(n ast.Node) string { |
490 | switch n := n.(type) { |
491 | case *ast.ArrayType: |
492 | return "array type" |
493 | case *ast.AssignStmt: |
494 | return "assignment" |
495 | case *ast.BadDecl: |
496 | return "bad declaration" |
497 | case *ast.BadExpr: |
498 | return "bad expression" |
499 | case *ast.BadStmt: |
500 | return "bad statement" |
501 | case *ast.BasicLit: |
502 | return "basic literal" |
503 | case *ast.BinaryExpr: |
504 | return fmt.Sprintf("binary %s operation", n.Op) |
505 | case *ast.BlockStmt: |
506 | return "block" |
507 | case *ast.BranchStmt: |
508 | switch n.Tok { |
509 | case token.BREAK: |
510 | return "break statement" |
511 | case token.CONTINUE: |
512 | return "continue statement" |
513 | case token.GOTO: |
514 | return "goto statement" |
515 | case token.FALLTHROUGH: |
516 | return "fall-through statement" |
517 | } |
518 | case *ast.CallExpr: |
519 | if len(n.Args) == 1 && !n.Ellipsis.IsValid() { |
520 | return "function call (or conversion)" |
521 | } |
522 | return "function call" |
523 | case *ast.CaseClause: |
524 | return "case clause" |
525 | case *ast.ChanType: |
526 | return "channel type" |
527 | case *ast.CommClause: |
528 | return "communication clause" |
529 | case *ast.Comment: |
530 | return "comment" |
531 | case *ast.CommentGroup: |
532 | return "comment group" |
533 | case *ast.CompositeLit: |
534 | return "composite literal" |
535 | case *ast.DeclStmt: |
536 | return NodeDescription(n.Decl) + " statement" |
537 | case *ast.DeferStmt: |
538 | return "defer statement" |
539 | case *ast.Ellipsis: |
540 | return "ellipsis" |
541 | case *ast.EmptyStmt: |
542 | return "empty statement" |
543 | case *ast.ExprStmt: |
544 | return "expression statement" |
545 | case *ast.Field: |
546 | // Can be any of these: |
547 | // struct {x, y int} -- struct field(s) |
548 | // struct {T} -- anon struct field |
549 | // interface {I} -- interface embedding |
550 | // interface {f()} -- interface method |
551 | // func (A) func(B) C -- receiver, param(s), result(s) |
552 | return "field/method/parameter" |
553 | case *ast.FieldList: |
554 | return "field/method/parameter list" |
555 | case *ast.File: |
556 | return "source file" |
557 | case *ast.ForStmt: |
558 | return "for loop" |
559 | case *ast.FuncDecl: |
560 | return "function declaration" |
561 | case *ast.FuncLit: |
562 | return "function literal" |
563 | case *ast.FuncType: |
564 | return "function type" |
565 | case *ast.GenDecl: |
566 | switch n.Tok { |
567 | case token.IMPORT: |
568 | return "import declaration" |
569 | case token.CONST: |
570 | return "constant declaration" |
571 | case token.TYPE: |
572 | return "type declaration" |
573 | case token.VAR: |
574 | return "variable declaration" |
575 | } |
576 | case *ast.GoStmt: |
577 | return "go statement" |
578 | case *ast.Ident: |
579 | return "identifier" |
580 | case *ast.IfStmt: |
581 | return "if statement" |
582 | case *ast.ImportSpec: |
583 | return "import specification" |
584 | case *ast.IncDecStmt: |
585 | if n.Tok == token.INC { |
586 | return "increment statement" |
587 | } |
588 | return "decrement statement" |
589 | case *ast.IndexExpr: |
590 | return "index expression" |
591 | case *typeparams.IndexListExpr: |
592 | return "index list expression" |
593 | case *ast.InterfaceType: |
594 | return "interface type" |
595 | case *ast.KeyValueExpr: |
596 | return "key/value association" |
597 | case *ast.LabeledStmt: |
598 | return "statement label" |
599 | case *ast.MapType: |
600 | return "map type" |
601 | case *ast.Package: |
602 | return "package" |
603 | case *ast.ParenExpr: |
604 | return "parenthesized " + NodeDescription(n.X) |
605 | case *ast.RangeStmt: |
606 | return "range loop" |
607 | case *ast.ReturnStmt: |
608 | return "return statement" |
609 | case *ast.SelectStmt: |
610 | return "select statement" |
611 | case *ast.SelectorExpr: |
612 | return "selector" |
613 | case *ast.SendStmt: |
614 | return "channel send" |
615 | case *ast.SliceExpr: |
616 | return "slice expression" |
617 | case *ast.StarExpr: |
618 | return "*-operation" // load/store expr or pointer type |
619 | case *ast.StructType: |
620 | return "struct type" |
621 | case *ast.SwitchStmt: |
622 | return "switch statement" |
623 | case *ast.TypeAssertExpr: |
624 | return "type assertion" |
625 | case *ast.TypeSpec: |
626 | return "type specification" |
627 | case *ast.TypeSwitchStmt: |
628 | return "type switch" |
629 | case *ast.UnaryExpr: |
630 | return fmt.Sprintf("unary %s operation", n.Op) |
631 | case *ast.ValueSpec: |
632 | return "value specification" |
633 | |
634 | } |
635 | panic(fmt.Sprintf("unexpected node type: %T", n)) |
636 | } |
637 |
Members