1 | // Copyright 2019 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | /* |
6 | The digraph command performs queries over unlabelled directed graphs |
7 | represented in text form. It is intended to integrate nicely with |
8 | typical UNIX command pipelines. |
9 | |
10 | Usage: |
11 | |
12 | your-application | digraph [command] |
13 | |
14 | The support commands are: |
15 | |
16 | nodes |
17 | the set of all nodes |
18 | degree |
19 | the in-degree and out-degree of each node |
20 | transpose |
21 | the reverse of the input edges |
22 | preds <node> ... |
23 | the set of immediate predecessors of the specified nodes |
24 | succs <node> ... |
25 | the set of immediate successors of the specified nodes |
26 | forward <node> ... |
27 | the set of nodes transitively reachable from the specified nodes |
28 | reverse <node> ... |
29 | the set of nodes that transitively reach the specified nodes |
30 | somepath <node> <node> |
31 | the list of nodes on some arbitrary path from the first node to the second |
32 | allpaths <node> <node> |
33 | the set of nodes on all paths from the first node to the second |
34 | sccs |
35 | all strongly connected components (one per line) |
36 | scc <node> |
37 | the set of nodes strongly connected to the specified one |
38 | focus <node> |
39 | the subgraph containing all directed paths that pass through the specified node |
40 | |
41 | Input format: |
42 | |
43 | Each line contains zero or more words. Words are separated by unquoted |
44 | whitespace; words may contain Go-style double-quoted portions, allowing spaces |
45 | and other characters to be expressed. |
46 | |
47 | Each word declares a node, and if there are more than one, an edge from the |
48 | first to each subsequent one. The graph is provided on the standard input. |
49 | |
50 | For instance, the following (acyclic) graph specifies a partial order among the |
51 | subtasks of getting dressed: |
52 | |
53 | $ cat clothes.txt |
54 | socks shoes |
55 | "boxer shorts" pants |
56 | pants belt shoes |
57 | shirt tie sweater |
58 | sweater jacket |
59 | hat |
60 | |
61 | The line "shirt tie sweater" indicates the two edges shirt -> tie and |
62 | shirt -> sweater, not shirt -> tie -> sweater. |
63 | |
64 | Example usage: |
65 | |
66 | Using digraph with existing Go tools: |
67 | |
68 | $ go mod graph | digraph nodes # Operate on the Go module graph. |
69 | $ go list -m all | digraph nodes # Operate on the Go package graph. |
70 | |
71 | Show the transitive closure of imports of the digraph tool itself: |
72 | |
73 | $ go list -f '{{.ImportPath}} {{join .Imports " "}}' ... | digraph forward golang.org/x/tools/cmd/digraph |
74 | |
75 | Show which clothes (see above) must be donned before a jacket: |
76 | |
77 | $ digraph reverse jacket |
78 | */ |
79 | package main // import "golang.org/x/tools/cmd/digraph" |
80 | |
81 | // TODO(adonovan): |
82 | // - support input files other than stdin |
83 | // - support alternative formats (AT&T GraphViz, CSV, etc), |
84 | // a comment syntax, etc. |
85 | // - allow queries to nest, like Blaze query language. |
86 | |
87 | import ( |
88 | "bufio" |
89 | "bytes" |
90 | "errors" |
91 | "flag" |
92 | "fmt" |
93 | "io" |
94 | "os" |
95 | "sort" |
96 | "strconv" |
97 | "strings" |
98 | "unicode" |
99 | "unicode/utf8" |
100 | ) |
101 | |
102 | func usage() { |
103 | fmt.Fprintf(os.Stderr, `Usage: your-application | digraph [command] |
104 | |
105 | The support commands are: |
106 | nodes |
107 | the set of all nodes |
108 | degree |
109 | the in-degree and out-degree of each node |
110 | transpose |
111 | the reverse of the input edges |
112 | preds <node> ... |
113 | the set of immediate predecessors of the specified nodes |
114 | succs <node> ... |
115 | the set of immediate successors of the specified nodes |
116 | forward <node> ... |
117 | the set of nodes transitively reachable from the specified nodes |
118 | reverse <node> ... |
119 | the set of nodes that transitively reach the specified nodes |
120 | somepath <node> <node> |
121 | the list of nodes on some arbitrary path from the first node to the second |
122 | allpaths <node> <node> |
123 | the set of nodes on all paths from the first node to the second |
124 | sccs |
125 | all non-trivial strongly connected components, one per line |
126 | (single-node components are only printed for nodes with self-loops) |
127 | scc <node> |
128 | the set of nodes nodes strongly connected to the specified one |
129 | focus <node> |
130 | the subgraph containing all directed paths that pass through the specified node |
131 | `) |
132 | os.Exit(2) |
133 | } |
134 | |
135 | func main() { |
136 | flag.Usage = usage |
137 | flag.Parse() |
138 | |
139 | args := flag.Args() |
140 | if len(args) == 0 { |
141 | usage() |
142 | } |
143 | |
144 | if err := digraph(args[0], args[1:]); err != nil { |
145 | fmt.Fprintf(os.Stderr, "digraph: %s\n", err) |
146 | os.Exit(1) |
147 | } |
148 | } |
149 | |
150 | type nodelist []string |
151 | |
152 | func (l nodelist) println(sep string) { |
153 | for i, node := range l { |
154 | if i > 0 { |
155 | fmt.Fprint(stdout, sep) |
156 | } |
157 | fmt.Fprint(stdout, node) |
158 | } |
159 | fmt.Fprintln(stdout) |
160 | } |
161 | |
162 | type nodeset map[string]bool |
163 | |
164 | func (s nodeset) sort() nodelist { |
165 | nodes := make(nodelist, len(s)) |
166 | var i int |
167 | for node := range s { |
168 | nodes[i] = node |
169 | i++ |
170 | } |
171 | sort.Strings(nodes) |
172 | return nodes |
173 | } |
174 | |
175 | func (s nodeset) addAll(x nodeset) { |
176 | for node := range x { |
177 | s[node] = true |
178 | } |
179 | } |
180 | |
181 | // A graph maps nodes to the non-nil set of their immediate successors. |
182 | type graph map[string]nodeset |
183 | |
184 | func (g graph) addNode(node string) nodeset { |
185 | edges := g[node] |
186 | if edges == nil { |
187 | edges = make(nodeset) |
188 | g[node] = edges |
189 | } |
190 | return edges |
191 | } |
192 | |
193 | func (g graph) addEdges(from string, to ...string) { |
194 | edges := g.addNode(from) |
195 | for _, to := range to { |
196 | g.addNode(to) |
197 | edges[to] = true |
198 | } |
199 | } |
200 | |
201 | func (g graph) reachableFrom(roots nodeset) nodeset { |
202 | seen := make(nodeset) |
203 | var visit func(node string) |
204 | visit = func(node string) { |
205 | if !seen[node] { |
206 | seen[node] = true |
207 | for e := range g[node] { |
208 | visit(e) |
209 | } |
210 | } |
211 | } |
212 | for root := range roots { |
213 | visit(root) |
214 | } |
215 | return seen |
216 | } |
217 | |
218 | func (g graph) transpose() graph { |
219 | rev := make(graph) |
220 | for node, edges := range g { |
221 | rev.addNode(node) |
222 | for succ := range edges { |
223 | rev.addEdges(succ, node) |
224 | } |
225 | } |
226 | return rev |
227 | } |
228 | |
229 | func (g graph) sccs() []nodeset { |
230 | // Kosaraju's algorithm---Tarjan is overkill here. |
231 | |
232 | // Forward pass. |
233 | S := make(nodelist, 0, len(g)) // postorder stack |
234 | seen := make(nodeset) |
235 | var visit func(node string) |
236 | visit = func(node string) { |
237 | if !seen[node] { |
238 | seen[node] = true |
239 | for e := range g[node] { |
240 | visit(e) |
241 | } |
242 | S = append(S, node) |
243 | } |
244 | } |
245 | for node := range g { |
246 | visit(node) |
247 | } |
248 | |
249 | // Reverse pass. |
250 | rev := g.transpose() |
251 | var scc nodeset |
252 | seen = make(nodeset) |
253 | var rvisit func(node string) |
254 | rvisit = func(node string) { |
255 | if !seen[node] { |
256 | seen[node] = true |
257 | scc[node] = true |
258 | for e := range rev[node] { |
259 | rvisit(e) |
260 | } |
261 | } |
262 | } |
263 | var sccs []nodeset |
264 | for len(S) > 0 { |
265 | top := S[len(S)-1] |
266 | S = S[:len(S)-1] // pop |
267 | if !seen[top] { |
268 | scc = make(nodeset) |
269 | rvisit(top) |
270 | if len(scc) == 1 && !g[top][top] { |
271 | continue |
272 | } |
273 | sccs = append(sccs, scc) |
274 | } |
275 | } |
276 | return sccs |
277 | } |
278 | |
279 | func (g graph) allpaths(from, to string) error { |
280 | // Mark all nodes to "to". |
281 | seen := make(nodeset) // value of seen[x] indicates whether x is on some path to "to" |
282 | var visit func(node string) bool |
283 | visit = func(node string) bool { |
284 | reachesTo, ok := seen[node] |
285 | if !ok { |
286 | reachesTo = node == to |
287 | seen[node] = reachesTo |
288 | for e := range g[node] { |
289 | if visit(e) { |
290 | reachesTo = true |
291 | } |
292 | } |
293 | if reachesTo && node != to { |
294 | seen[node] = true |
295 | } |
296 | } |
297 | return reachesTo |
298 | } |
299 | visit(from) |
300 | |
301 | // For each marked node, collect its marked successors. |
302 | var edges []string |
303 | for n := range seen { |
304 | for succ := range g[n] { |
305 | if seen[succ] { |
306 | edges = append(edges, n+" "+succ) |
307 | } |
308 | } |
309 | } |
310 | |
311 | // Sort (so that this method is deterministic) and print edges. |
312 | sort.Strings(edges) |
313 | for _, e := range edges { |
314 | fmt.Fprintln(stdout, e) |
315 | } |
316 | |
317 | return nil |
318 | } |
319 | |
320 | func (g graph) somepath(from, to string) error { |
321 | type edge struct{ from, to string } |
322 | seen := make(nodeset) |
323 | var dfs func(path []edge, from string) bool |
324 | dfs = func(path []edge, from string) bool { |
325 | if !seen[from] { |
326 | seen[from] = true |
327 | if from == to { |
328 | // fmt.Println(path, len(path), cap(path)) |
329 | // Print and unwind. |
330 | for _, e := range path { |
331 | fmt.Fprintln(stdout, e.from+" "+e.to) |
332 | } |
333 | return true |
334 | } |
335 | for e := range g[from] { |
336 | if dfs(append(path, edge{from: from, to: e}), e) { |
337 | return true |
338 | } |
339 | } |
340 | } |
341 | return false |
342 | } |
343 | maxEdgesInGraph := len(g) * (len(g) - 1) |
344 | if !dfs(make([]edge, 0, maxEdgesInGraph), from) { |
345 | return fmt.Errorf("no path from %q to %q", from, to) |
346 | } |
347 | return nil |
348 | } |
349 | |
350 | func parse(rd io.Reader) (graph, error) { |
351 | g := make(graph) |
352 | |
353 | var linenum int |
354 | in := bufio.NewScanner(rd) |
355 | for in.Scan() { |
356 | linenum++ |
357 | // Split into words, honoring double-quotes per Go spec. |
358 | words, err := split(in.Text()) |
359 | if err != nil { |
360 | return nil, fmt.Errorf("at line %d: %v", linenum, err) |
361 | } |
362 | if len(words) > 0 { |
363 | g.addEdges(words[0], words[1:]...) |
364 | } |
365 | } |
366 | if err := in.Err(); err != nil { |
367 | return nil, err |
368 | } |
369 | return g, nil |
370 | } |
371 | |
372 | // Overridable for redirection. |
373 | var stdin io.Reader = os.Stdin |
374 | var stdout io.Writer = os.Stdout |
375 | |
376 | func digraph(cmd string, args []string) error { |
377 | // Parse the input graph. |
378 | g, err := parse(stdin) |
379 | if err != nil { |
380 | return err |
381 | } |
382 | |
383 | // Parse the command line. |
384 | switch cmd { |
385 | case "nodes": |
386 | if len(args) != 0 { |
387 | return fmt.Errorf("usage: digraph nodes") |
388 | } |
389 | nodes := make(nodeset) |
390 | for node := range g { |
391 | nodes[node] = true |
392 | } |
393 | nodes.sort().println("\n") |
394 | |
395 | case "degree": |
396 | if len(args) != 0 { |
397 | return fmt.Errorf("usage: digraph degree") |
398 | } |
399 | nodes := make(nodeset) |
400 | for node := range g { |
401 | nodes[node] = true |
402 | } |
403 | rev := g.transpose() |
404 | for _, node := range nodes.sort() { |
405 | fmt.Fprintf(stdout, "%d\t%d\t%s\n", len(rev[node]), len(g[node]), node) |
406 | } |
407 | |
408 | case "transpose": |
409 | if len(args) != 0 { |
410 | return fmt.Errorf("usage: digraph transpose") |
411 | } |
412 | var revEdges []string |
413 | for node, succs := range g.transpose() { |
414 | for succ := range succs { |
415 | revEdges = append(revEdges, fmt.Sprintf("%s %s", node, succ)) |
416 | } |
417 | } |
418 | sort.Strings(revEdges) // make output deterministic |
419 | for _, e := range revEdges { |
420 | fmt.Fprintln(stdout, e) |
421 | } |
422 | |
423 | case "succs", "preds": |
424 | if len(args) == 0 { |
425 | return fmt.Errorf("usage: digraph %s <node> ... ", cmd) |
426 | } |
427 | g := g |
428 | if cmd == "preds" { |
429 | g = g.transpose() |
430 | } |
431 | result := make(nodeset) |
432 | for _, root := range args { |
433 | edges := g[root] |
434 | if edges == nil { |
435 | return fmt.Errorf("no such node %q", root) |
436 | } |
437 | result.addAll(edges) |
438 | } |
439 | result.sort().println("\n") |
440 | |
441 | case "forward", "reverse": |
442 | if len(args) == 0 { |
443 | return fmt.Errorf("usage: digraph %s <node> ... ", cmd) |
444 | } |
445 | roots := make(nodeset) |
446 | for _, root := range args { |
447 | if g[root] == nil { |
448 | return fmt.Errorf("no such node %q", root) |
449 | } |
450 | roots[root] = true |
451 | } |
452 | g := g |
453 | if cmd == "reverse" { |
454 | g = g.transpose() |
455 | } |
456 | g.reachableFrom(roots).sort().println("\n") |
457 | |
458 | case "somepath": |
459 | if len(args) != 2 { |
460 | return fmt.Errorf("usage: digraph somepath <from> <to>") |
461 | } |
462 | from, to := args[0], args[1] |
463 | if g[from] == nil { |
464 | return fmt.Errorf("no such 'from' node %q", from) |
465 | } |
466 | if g[to] == nil { |
467 | return fmt.Errorf("no such 'to' node %q", to) |
468 | } |
469 | if err := g.somepath(from, to); err != nil { |
470 | return err |
471 | } |
472 | |
473 | case "allpaths": |
474 | if len(args) != 2 { |
475 | return fmt.Errorf("usage: digraph allpaths <from> <to>") |
476 | } |
477 | from, to := args[0], args[1] |
478 | if g[from] == nil { |
479 | return fmt.Errorf("no such 'from' node %q", from) |
480 | } |
481 | if g[to] == nil { |
482 | return fmt.Errorf("no such 'to' node %q", to) |
483 | } |
484 | if err := g.allpaths(from, to); err != nil { |
485 | return err |
486 | } |
487 | |
488 | case "sccs": |
489 | if len(args) != 0 { |
490 | return fmt.Errorf("usage: digraph sccs") |
491 | } |
492 | buf := new(bytes.Buffer) |
493 | oldStdout := stdout |
494 | stdout = buf |
495 | for _, scc := range g.sccs() { |
496 | scc.sort().println(" ") |
497 | } |
498 | lines := strings.SplitAfter(buf.String(), "\n") |
499 | sort.Strings(lines) |
500 | stdout = oldStdout |
501 | io.WriteString(stdout, strings.Join(lines, "")) |
502 | |
503 | case "scc": |
504 | if len(args) != 1 { |
505 | return fmt.Errorf("usage: digraph scc <node>") |
506 | } |
507 | node := args[0] |
508 | if g[node] == nil { |
509 | return fmt.Errorf("no such node %q", node) |
510 | } |
511 | for _, scc := range g.sccs() { |
512 | if scc[node] { |
513 | scc.sort().println("\n") |
514 | break |
515 | } |
516 | } |
517 | |
518 | case "focus": |
519 | if len(args) != 1 { |
520 | return fmt.Errorf("usage: digraph focus <node>") |
521 | } |
522 | node := args[0] |
523 | if g[node] == nil { |
524 | return fmt.Errorf("no such node %q", node) |
525 | } |
526 | |
527 | edges := make(map[string]struct{}) |
528 | for from := range g.reachableFrom(nodeset{node: true}) { |
529 | for to := range g[from] { |
530 | edges[fmt.Sprintf("%s %s", from, to)] = struct{}{} |
531 | } |
532 | } |
533 | |
534 | gtrans := g.transpose() |
535 | for from := range gtrans.reachableFrom(nodeset{node: true}) { |
536 | for to := range gtrans[from] { |
537 | edges[fmt.Sprintf("%s %s", to, from)] = struct{}{} |
538 | } |
539 | } |
540 | |
541 | edgesSorted := make([]string, 0, len(edges)) |
542 | for e := range edges { |
543 | edgesSorted = append(edgesSorted, e) |
544 | } |
545 | sort.Strings(edgesSorted) |
546 | fmt.Fprintln(stdout, strings.Join(edgesSorted, "\n")) |
547 | |
548 | default: |
549 | return fmt.Errorf("no such command %q", cmd) |
550 | } |
551 | |
552 | return nil |
553 | } |
554 | |
555 | // -- Utilities -------------------------------------------------------- |
556 | |
557 | // split splits a line into words, which are generally separated by |
558 | // spaces, but Go-style double-quoted string literals are also supported. |
559 | // (This approximates the behaviour of the Bourne shell.) |
560 | // |
561 | // `one "two three"` -> ["one" "two three"] |
562 | // `a"\n"b` -> ["a\nb"] |
563 | func split(line string) ([]string, error) { |
564 | var ( |
565 | words []string |
566 | inWord bool |
567 | current bytes.Buffer |
568 | ) |
569 | |
570 | for len(line) > 0 { |
571 | r, size := utf8.DecodeRuneInString(line) |
572 | if unicode.IsSpace(r) { |
573 | if inWord { |
574 | words = append(words, current.String()) |
575 | current.Reset() |
576 | inWord = false |
577 | } |
578 | } else if r == '"' { |
579 | var ok bool |
580 | size, ok = quotedLength(line) |
581 | if !ok { |
582 | return nil, errors.New("invalid quotation") |
583 | } |
584 | s, err := strconv.Unquote(line[:size]) |
585 | if err != nil { |
586 | return nil, err |
587 | } |
588 | current.WriteString(s) |
589 | inWord = true |
590 | } else { |
591 | current.WriteRune(r) |
592 | inWord = true |
593 | } |
594 | line = line[size:] |
595 | } |
596 | if inWord { |
597 | words = append(words, current.String()) |
598 | } |
599 | return words, nil |
600 | } |
601 | |
602 | // quotedLength returns the length in bytes of the prefix of input that |
603 | // contain a possibly-valid double-quoted Go string literal. |
604 | // |
605 | // On success, n is at least two (""); input[:n] may be passed to |
606 | // strconv.Unquote to interpret its value, and input[n:] contains the |
607 | // rest of the input. |
608 | // |
609 | // On failure, quotedLength returns false, and the entire input can be |
610 | // passed to strconv.Unquote if an informative error message is desired. |
611 | // |
612 | // quotedLength does not and need not detect all errors, such as |
613 | // invalid hex or octal escape sequences, since it assumes |
614 | // strconv.Unquote will be applied to the prefix. It guarantees only |
615 | // that if there is a prefix of input containing a valid string literal, |
616 | // its length is returned. |
617 | // |
618 | // TODO(adonovan): move this into a strconv-like utility package. |
619 | func quotedLength(input string) (n int, ok bool) { |
620 | var offset int |
621 | |
622 | // next returns the rune at offset, or -1 on EOF. |
623 | // offset advances to just after that rune. |
624 | next := func() rune { |
625 | if offset < len(input) { |
626 | r, size := utf8.DecodeRuneInString(input[offset:]) |
627 | offset += size |
628 | return r |
629 | } |
630 | return -1 |
631 | } |
632 | |
633 | if next() != '"' { |
634 | return // error: not a quotation |
635 | } |
636 | |
637 | for { |
638 | r := next() |
639 | if r == '\n' || r < 0 { |
640 | return // error: string literal not terminated |
641 | } |
642 | if r == '"' { |
643 | return offset, true // success |
644 | } |
645 | if r == '\\' { |
646 | var skip int |
647 | switch next() { |
648 | case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"': |
649 | skip = 0 |
650 | case '0', '1', '2', '3', '4', '5', '6', '7': |
651 | skip = 2 |
652 | case 'x': |
653 | skip = 2 |
654 | case 'u': |
655 | skip = 4 |
656 | case 'U': |
657 | skip = 8 |
658 | default: |
659 | return // error: invalid escape |
660 | } |
661 | |
662 | for i := 0; i < skip; i++ { |
663 | next() |
664 | } |
665 | } |
666 | } |
667 | } |
668 |
Members