gopls/cmd/digraph/digraph.go

1	// Copyright 2019 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	/*
6	The digraph command performs queries over unlabelled directed graphs
7	represented in text form. It is intended to integrate nicely with
8	typical UNIX command pipelines.
9
10	Usage:
11
12	your-application \| digraph [command]
13
14	The support commands are:
15
16	nodes
17	the set of all nodes
18	degree
19	the in-degree and out-degree of each node
20	transpose
21	the reverse of the input edges
22	preds <node> ...
23	the set of immediate predecessors of the specified nodes
24	succs <node> ...
25	the set of immediate successors of the specified nodes
26	forward <node> ...
27	the set of nodes transitively reachable from the specified nodes
28	reverse <node> ...
29	the set of nodes that transitively reach the specified nodes
30	somepath <node> <node>
31	the list of nodes on some arbitrary path from the first node to the second
32	allpaths <node> <node>
33	the set of nodes on all paths from the first node to the second
34	sccs
35	all strongly connected components (one per line)
36	scc <node>
37	the set of nodes strongly connected to the specified one
38	focus <node>
39	the subgraph containing all directed paths that pass through the specified node
40
41	Input format:
42
43	Each line contains zero or more words. Words are separated by unquoted
44	whitespace; words may contain Go-style double-quoted portions, allowing spaces
45	and other characters to be expressed.
46
47	Each word declares a node, and if there are more than one, an edge from the
48	first to each subsequent one. The graph is provided on the standard input.
49
50	For instance, the following (acyclic) graph specifies a partial order among the
51	subtasks of getting dressed:
52
53	$ cat clothes.txt
54	socks shoes
55	"boxer shorts" pants
56	pants belt shoes
57	shirt tie sweater
58	sweater jacket
59	hat
60
61	The line "shirt tie sweater" indicates the two edges shirt -> tie and
62	shirt -> sweater, not shirt -> tie -> sweater.
63
64	Example usage:
65
66	Using digraph with existing Go tools:
67
68	$ go mod graph \| digraph nodes # Operate on the Go module graph.
69	$ go list -m all \| digraph nodes # Operate on the Go package graph.
70
71	Show the transitive closure of imports of the digraph tool itself:
72
73	$ go list -f '{{.ImportPath}} {{join .Imports " "}}' ... \| digraph forward golang.org/x/tools/cmd/digraph
74
75	Show which clothes (see above) must be donned before a jacket:
76
77	$ digraph reverse jacket
78	*/
79	package main // import "golang.org/x/tools/cmd/digraph"
80
81	// TODO(adonovan):
82	// - support input files other than stdin
83	// - support alternative formats (AT&T GraphViz, CSV, etc),
84	// a comment syntax, etc.
85	// - allow queries to nest, like Blaze query language.
86
87	import (
88	"bufio"
89	"bytes"
90	"errors"
91	"flag"
92	"fmt"
93	"io"
94	"os"
95	"sort"
96	"strconv"
97	"strings"
98	"unicode"
99	"unicode/utf8"
100	)
101
102	func usage() {
103	fmt.Fprintf(os.Stderr, `Usage: your-application \| digraph [command]
104
105	The support commands are:
106	nodes
107	the set of all nodes
108	degree
109	the in-degree and out-degree of each node
110	transpose
111	the reverse of the input edges
112	preds <node> ...
113	the set of immediate predecessors of the specified nodes
114	succs <node> ...
115	the set of immediate successors of the specified nodes
116	forward <node> ...
117	the set of nodes transitively reachable from the specified nodes
118	reverse <node> ...
119	the set of nodes that transitively reach the specified nodes
120	somepath <node> <node>
121	the list of nodes on some arbitrary path from the first node to the second
122	allpaths <node> <node>
123	the set of nodes on all paths from the first node to the second
124	sccs
125	all non-trivial strongly connected components, one per line
126	(single-node components are only printed for nodes with self-loops)
127	scc <node>
128	the set of nodes nodes strongly connected to the specified one
129	focus <node>
130	the subgraph containing all directed paths that pass through the specified node
131	`)
132	os.Exit(2)
133	}
134
135	func main() {
136	flag.Usage = usage
137	flag.Parse()
138
139	args := flag.Args()
140	if len(args) == 0 {
141	usage()
142	}
143
144	if err := digraph(args[0], args[1:]); err != nil {
145	fmt.Fprintf(os.Stderr, "digraph: %s\n", err)
146	os.Exit(1)
147	}
148	}
149
150	type nodelist []string
151
152	func (l nodelist) println(sep string) {
153	for i, node := range l {
154	if i > 0 {
155	fmt.Fprint(stdout, sep)
156	}
157	fmt.Fprint(stdout, node)
158	}
159	fmt.Fprintln(stdout)
160	}
161
162	type nodeset map[string]bool
163
164	func (s nodeset) sort() nodelist {
165	nodes := make(nodelist, len(s))
166	var i int
167	for node := range s {
168	nodes[i] = node
169	i++
170	}
171	sort.Strings(nodes)
172	return nodes
173	}
174
175	func (s nodeset) addAll(x nodeset) {
176	for node := range x {
177	s[node] = true
178	}
179	}
180
181	// A graph maps nodes to the non-nil set of their immediate successors.
182	type graph map[string]nodeset
183
184	func (g graph) addNode(node string) nodeset {
185	edges := g[node]
186	if edges == nil {
187	edges = make(nodeset)
188	g[node] = edges
189	}
190	return edges
191	}
192
193	func (g graph) addEdges(from string, to ...string) {
194	edges := g.addNode(from)
195	for _, to := range to {
196	g.addNode(to)
197	edges[to] = true
198	}
199	}
200
201	func (g graph) reachableFrom(roots nodeset) nodeset {
202	seen := make(nodeset)
203	var visit func(node string)
204	visit = func(node string) {
205	if !seen[node] {
206	seen[node] = true
207	for e := range g[node] {
208	visit(e)
209	}
210	}
211	}
212	for root := range roots {
213	visit(root)
214	}
215	return seen
216	}
217
218	func (g graph) transpose() graph {
219	rev := make(graph)
220	for node, edges := range g {
221	rev.addNode(node)
222	for succ := range edges {
223	rev.addEdges(succ, node)
224	}
225	}
226	return rev
227	}
228
229	func (g graph) sccs() []nodeset {
230	// Kosaraju's algorithm---Tarjan is overkill here.
231
232	// Forward pass.
233	S := make(nodelist, 0, len(g)) // postorder stack
234	seen := make(nodeset)
235	var visit func(node string)
236	visit = func(node string) {
237	if !seen[node] {
238	seen[node] = true
239	for e := range g[node] {
240	visit(e)
241	}
242	S = append(S, node)
243	}
244	}
245	for node := range g {
246	visit(node)
247	}
248
249	// Reverse pass.
250	rev := g.transpose()
251	var scc nodeset
252	seen = make(nodeset)
253	var rvisit func(node string)
254	rvisit = func(node string) {
255	if !seen[node] {
256	seen[node] = true
257	scc[node] = true
258	for e := range rev[node] {
259	rvisit(e)
260	}
261	}
262	}
263	var sccs []nodeset
264	for len(S) > 0 {
265	top := S[len(S)-1]
266	S = S[:len(S)-1] // pop
267	if !seen[top] {
268	scc = make(nodeset)
269	rvisit(top)
270	if len(scc) == 1 && !g[top][top] {
271	continue
272	}
273	sccs = append(sccs, scc)
274	}
275	}
276	return sccs
277	}
278
279	func (g graph) allpaths(from, to string) error {
280	// Mark all nodes to "to".
281	seen := make(nodeset) // value of seen[x] indicates whether x is on some path to "to"
282	var visit func(node string) bool
283	visit = func(node string) bool {
284	reachesTo, ok := seen[node]
285	if !ok {
286	reachesTo = node == to
287	seen[node] = reachesTo
288	for e := range g[node] {
289	if visit(e) {
290	reachesTo = true
291	}
292	}
293	if reachesTo && node != to {
294	seen[node] = true
295	}
296	}
297	return reachesTo
298	}
299	visit(from)
300
301	// For each marked node, collect its marked successors.
302	var edges []string
303	for n := range seen {
304	for succ := range g[n] {
305	if seen[succ] {
306	edges = append(edges, n+" "+succ)
307	}
308	}
309	}
310
311	// Sort (so that this method is deterministic) and print edges.
312	sort.Strings(edges)
313	for _, e := range edges {
314	fmt.Fprintln(stdout, e)
315	}
316
317	return nil
318	}
319
320	func (g graph) somepath(from, to string) error {
321	type edge struct{ from, to string }
322	seen := make(nodeset)
323	var dfs func(path []edge, from string) bool
324	dfs = func(path []edge, from string) bool {
325	if !seen[from] {
326	seen[from] = true
327	if from == to {
328	// fmt.Println(path, len(path), cap(path))
329	// Print and unwind.
330	for _, e := range path {
331	fmt.Fprintln(stdout, e.from+" "+e.to)
332	}
333	return true
334	}
335	for e := range g[from] {
336	if dfs(append(path, edge{from: from, to: e}), e) {
337	return true
338	}
339	}
340	}
341	return false
342	}
343	maxEdgesInGraph := len(g) * (len(g) - 1)
344	if !dfs(make([]edge, 0, maxEdgesInGraph), from) {
345	return fmt.Errorf("no path from %q to %q", from, to)
346	}
347	return nil
348	}
349
350	func parse(rd io.Reader) (graph, error) {
351	g := make(graph)
352
353	var linenum int
354	in := bufio.NewScanner(rd)
355	for in.Scan() {
356	linenum++
357	// Split into words, honoring double-quotes per Go spec.
358	words, err := split(in.Text())
359	if err != nil {
360	return nil, fmt.Errorf("at line %d: %v", linenum, err)
361	}
362	if len(words) > 0 {
363	g.addEdges(words[0], words[1:]...)
364	}
365	}
366	if err := in.Err(); err != nil {
367	return nil, err
368	}
369	return g, nil
370	}
371
372	// Overridable for redirection.
373	var stdin io.Reader = os.Stdin
374	var stdout io.Writer = os.Stdout
375
376	func digraph(cmd string, args []string) error {
377	// Parse the input graph.
378	g, err := parse(stdin)
379	if err != nil {
380	return err
381	}
382
383	// Parse the command line.
384	switch cmd {
385	case "nodes":
386	if len(args) != 0 {
387	return fmt.Errorf("usage: digraph nodes")
388	}
389	nodes := make(nodeset)
390	for node := range g {
391	nodes[node] = true
392	}
393	nodes.sort().println("\n")
394
395	case "degree":
396	if len(args) != 0 {
397	return fmt.Errorf("usage: digraph degree")
398	}
399	nodes := make(nodeset)
400	for node := range g {
401	nodes[node] = true
402	}
403	rev := g.transpose()
404	for _, node := range nodes.sort() {
405	fmt.Fprintf(stdout, "%d\t%d\t%s\n", len(rev[node]), len(g[node]), node)
406	}
407
408	case "transpose":
409	if len(args) != 0 {
410	return fmt.Errorf("usage: digraph transpose")
411	}
412	var revEdges []string
413	for node, succs := range g.transpose() {
414	for succ := range succs {
415	revEdges = append(revEdges, fmt.Sprintf("%s %s", node, succ))
416	}
417	}
418	sort.Strings(revEdges) // make output deterministic
419	for _, e := range revEdges {
420	fmt.Fprintln(stdout, e)
421	}
422
423	case "succs", "preds":
424	if len(args) == 0 {
425	return fmt.Errorf("usage: digraph %s <node> ... ", cmd)
426	}
427	g := g
428	if cmd == "preds" {
429	g = g.transpose()
430	}
431	result := make(nodeset)
432	for _, root := range args {
433	edges := g[root]
434	if edges == nil {
435	return fmt.Errorf("no such node %q", root)
436	}
437	result.addAll(edges)
438	}
439	result.sort().println("\n")
440
441	case "forward", "reverse":
442	if len(args) == 0 {
443	return fmt.Errorf("usage: digraph %s <node> ... ", cmd)
444	}
445	roots := make(nodeset)
446	for _, root := range args {
447	if g[root] == nil {
448	return fmt.Errorf("no such node %q", root)
449	}
450	roots[root] = true
451	}
452	g := g
453	if cmd == "reverse" {
454	g = g.transpose()
455	}
456	g.reachableFrom(roots).sort().println("\n")
457
458	case "somepath":
459	if len(args) != 2 {
460	return fmt.Errorf("usage: digraph somepath <from> <to>")
461	}
462	from, to := args[0], args[1]
463	if g[from] == nil {
464	return fmt.Errorf("no such 'from' node %q", from)
465	}
466	if g[to] == nil {
467	return fmt.Errorf("no such 'to' node %q", to)
468	}
469	if err := g.somepath(from, to); err != nil {
470	return err
471	}
472
473	case "allpaths":
474	if len(args) != 2 {
475	return fmt.Errorf("usage: digraph allpaths <from> <to>")
476	}
477	from, to := args[0], args[1]
478	if g[from] == nil {
479	return fmt.Errorf("no such 'from' node %q", from)
480	}
481	if g[to] == nil {
482	return fmt.Errorf("no such 'to' node %q", to)
483	}
484	if err := g.allpaths(from, to); err != nil {
485	return err
486	}
487
488	case "sccs":
489	if len(args) != 0 {
490	return fmt.Errorf("usage: digraph sccs")
491	}
492	buf := new(bytes.Buffer)
493	oldStdout := stdout
494	stdout = buf
495	for _, scc := range g.sccs() {
496	scc.sort().println(" ")
497	}
498	lines := strings.SplitAfter(buf.String(), "\n")
499	sort.Strings(lines)
500	stdout = oldStdout
501	io.WriteString(stdout, strings.Join(lines, ""))
502
503	case "scc":
504	if len(args) != 1 {
505	return fmt.Errorf("usage: digraph scc <node>")
506	}
507	node := args[0]
508	if g[node] == nil {
509	return fmt.Errorf("no such node %q", node)
510	}
511	for _, scc := range g.sccs() {
512	if scc[node] {
513	scc.sort().println("\n")
514	break
515	}
516	}
517
518	case "focus":
519	if len(args) != 1 {
520	return fmt.Errorf("usage: digraph focus <node>")
521	}
522	node := args[0]
523	if g[node] == nil {
524	return fmt.Errorf("no such node %q", node)
525	}
526
527	edges := make(map[string]struct{})
528	for from := range g.reachableFrom(nodeset{node: true}) {
529	for to := range g[from] {
530	edges[fmt.Sprintf("%s %s", from, to)] = struct{}{}
531	}
532	}
533
534	gtrans := g.transpose()
535	for from := range gtrans.reachableFrom(nodeset{node: true}) {
536	for to := range gtrans[from] {
537	edges[fmt.Sprintf("%s %s", to, from)] = struct{}{}
538	}
539	}
540
541	edgesSorted := make([]string, 0, len(edges))
542	for e := range edges {
543	edgesSorted = append(edgesSorted, e)
544	}
545	sort.Strings(edgesSorted)
546	fmt.Fprintln(stdout, strings.Join(edgesSorted, "\n"))
547
548	default:
549	return fmt.Errorf("no such command %q", cmd)
550	}
551
552	return nil
553	}
554
555	// -- Utilities --------------------------------------------------------
556
557	// split splits a line into words, which are generally separated by
558	// spaces, but Go-style double-quoted string literals are also supported.
559	// (This approximates the behaviour of the Bourne shell.)
560	//
561	// `one "two three"` -> ["one" "two three"]
562	// `a"\n"b` -> ["a\nb"]
563	func split(line string) ([]string, error) {
564	var (
565	words []string
566	inWord bool
567	current bytes.Buffer
568	)
569
570	for len(line) > 0 {
571	r, size := utf8.DecodeRuneInString(line)
572	if unicode.IsSpace(r) {
573	if inWord {
574	words = append(words, current.String())
575	current.Reset()
576	inWord = false
577	}
578	} else if r == '"' {
579	var ok bool
580	size, ok = quotedLength(line)
581	if !ok {
582	return nil, errors.New("invalid quotation")
583	}
584	s, err := strconv.Unquote(line[:size])
585	if err != nil {
586	return nil, err
587	}
588	current.WriteString(s)
589	inWord = true
590	} else {
591	current.WriteRune(r)
592	inWord = true
593	}
594	line = line[size:]
595	}
596	if inWord {
597	words = append(words, current.String())
598	}
599	return words, nil
600	}
601
602	// quotedLength returns the length in bytes of the prefix of input that
603	// contain a possibly-valid double-quoted Go string literal.
604	//
605	// On success, n is at least two (""); input[:n] may be passed to
606	// strconv.Unquote to interpret its value, and input[n:] contains the
607	// rest of the input.
608	//
609	// On failure, quotedLength returns false, and the entire input can be
610	// passed to strconv.Unquote if an informative error message is desired.
611	//
612	// quotedLength does not and need not detect all errors, such as
613	// invalid hex or octal escape sequences, since it assumes
614	// strconv.Unquote will be applied to the prefix. It guarantees only
615	// that if there is a prefix of input containing a valid string literal,
616	// its length is returned.
617	//
618	// TODO(adonovan): move this into a strconv-like utility package.
619	func quotedLength(input string) (n int, ok bool) {
620	var offset int
621
622	// next returns the rune at offset, or -1 on EOF.
623	// offset advances to just after that rune.
624	next := func() rune {
625	if offset < len(input) {
626	r, size := utf8.DecodeRuneInString(input[offset:])
627	offset += size
628	return r
629	}
630	return -1
631	}
632
633	if next() != '"' {
634	return // error: not a quotation
635	}
636
637	for {
638	r := next()
639	if r == '\n' \|\| r < 0 {
640	return // error: string literal not terminated
641	}
642	if r == '"' {
643	return offset, true // success
644	}
645	if r == '\\' {
646	var skip int
647	switch next() {
648	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
649	skip = 0
650	case '0', '1', '2', '3', '4', '5', '6', '7':
651	skip = 2
652	case 'x':
653	skip = 2
654	case 'u':
655	skip = 4
656	case 'U':
657	skip = 8
658	default:
659	return // error: invalid escape
660	}
661
662	for i := 0; i < skip; i++ {
663	next()
664	}
665	}
666	}
667	}
668